Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 48 additions & 33 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,18 @@ on:
alias:
required: true
type: string
max-runners:
num-shards:
required: true
type: number
description: "Number of shards benchmark is running on"
shard:
required: true
type: number
description: "Maximum parallel runners to determine shards"

jobs:
compute-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
steps:
- id: gen
run: |
n="${{ inputs.max-runners }}"
shards=$(seq 0 $((n-1)) | paste -sd, -)
echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT

benchmark:
name: benchmark-${{ inputs.runtime-version }}-py${{ inputs.python-version }}-${{ inputs.alias }}
needs: compute-matrix

strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.compute-matrix.outputs.matrix) }}
name: benchmark-${{ inputs.runtime-version }}-shard${{ inputs.shard }}-py${{ inputs.python-version }}-${{ inputs.alias }}

container:
image: ${{ inputs.image }}
Expand All @@ -59,6 +47,11 @@ jobs:
run:
shell: bash -l {0}

outputs:
benchmark-metadata: ${{ steps.gather-benchmark-metadata.outputs.benchmark-metadata }}
runners-info: ${{ steps.gather-runners-info.outputs.runners-info }}
dependencies: ${{ steps.gather-dependencies.outputs.dependencies }}

steps:
- name: Check out code
uses: actions/checkout@v4
Expand Down Expand Up @@ -129,8 +122,8 @@ jobs:
source .venv/bin/activate

KERNELS=("softmax" "geglu" "swiglu" "jsd" "welford" "kl_div" "int4_gemm" "layer_norm" "layer_norm-bwd" "rms_norm" "rms_norm-bwd" "cross_entropy")
NUMSHARDS=${{ matrix.num_shards }}
SHARD=${{ matrix.shard }}
NUMSHARDS=${{ inputs.num-shards }}
SHARD=${{ inputs.shard }}

SHARD_KERNELS=()
for ((i=0; i<${#KERNELS[@]}; i++)); do
Expand Down Expand Up @@ -203,19 +196,41 @@ jobs:
fi
cat "$TEST_REPORTS_DIR/helionbench.json"

- name: Authenticate with AWS
uses: aws-actions/configure-aws-credentials@v4
- name: Gather benchmark metadata
id: gather-benchmark-metadata
uses: pytorch/test-infra/.github/actions/gather-benchmark-metadata@main
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1
github-token: ${{ secrets.GITHUB_TOKEN }}
venv: .venv/bin/activate

- name: Upload the benchmark results to OSS benchmark database for the dashboard
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
- name: Gather runners info
id: gather-runners-info
uses: pytorch/test-infra/.github/actions/gather-runners-info@main
with:
benchmark-results-dir: test/test-reports
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
venv: ".venv/bin/activate"
venv: .venv/bin/activate

- name: Gather dependencies
id: gather-dependencies
uses: pytorch/test-infra/.github/actions/gather-dependencies@main
with:
venv: .venv/bin/activate

- name: Upload the benchmark results to GitHub
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
path: test/test-reports

upload-benchmark-results:
needs: benchmark
uses: pytorch/test-infra/.github/workflows/upload_benchmark_results.yml@main
permissions:
id-token: write
contents: read
with:
benchmark-artifact: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }}
benchmark-metadata: ${{ needs.benchmark.outputs.benchmark-metadata }}
runners-info: ${{ needs.benchmark.outputs.runners-info }}
dependencies: ${{ needs.benchmark.outputs.dependencies }}
schema-version: v3
dry-run: false
37 changes: 32 additions & 5 deletions .github/workflows/benchmark_dispatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,17 @@ on:
- cron: '0 8 * * *' # Runs at midnight PST (8 AM UTC)

jobs:
run-h100:
gen-matrix-h100:
if: ${{ github.event.inputs.run_h100 == 'true' || github.event_name == 'schedule' }}
uses: ./.github/workflows/compute-benchmark-matrix.yml
with:
max-runners: 12

run-h100:
needs: gen-matrix-h100
uses: ./.github/workflows/benchmark.yml
strategy:
matrix: ${{ fromJSON(needs.gen-matrix-h100.outputs.matrix) }}
permissions:
id-token: write
contents: read
Expand All @@ -35,11 +43,20 @@ jobs:
runtime-version: cu129
container-options: --gpus all
alias: h100
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}

gen-matrix-b200:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }}
with:
max-runners: 12

run-b200:
if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }}
needs: gen-matrix-b200
uses: ./.github/workflows/benchmark.yml
strategy:
matrix: ${{ fromJSON(needs.gen-matrix-b200.outputs.matrix) }}
permissions:
id-token: write
contents: read
Expand All @@ -50,11 +67,20 @@ jobs:
runtime-version: cu129
container-options: --gpus all
alias: b200
max-runners: 12
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}

run-mi325x:
gen-matrix-mi325x:
uses: ./.github/workflows/compute-benchmark-matrix.yml
if: ${{ github.event.inputs.run_mi325x == 'true' || github.event_name == 'schedule' }}
with:
max-runners: 6

run-mi325x:
needs: gen-matrix-mi325x
uses: ./.github/workflows/benchmark.yml
strategy:
matrix: ${{ fromJSON(needs.gen-matrix-mi325x.outputs.matrix) }}
permissions:
id-token: write
contents: read
Expand All @@ -65,4 +91,5 @@ jobs:
runtime-version: rocm7.0
container-options: --device=/dev/kfd --device=/dev/dri
alias: mi325x
max-runners: 6
num-shards: ${{ matrix.num_shards }}
shard: ${{ matrix.shard }}
24 changes: 24 additions & 0 deletions .github/workflows/compute-benchmark-matrix.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Compute Benchmark Matrix

on:
workflow_call:
inputs:
max-runners:
required: true
type: string
outputs:
matrix:
description: "The generated matrix for sharding"
value: ${{ jobs.gen.outputs.matrix }}

jobs:
gen:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.gen.outputs.matrix }}
steps:
- id: gen
run: |
n="${{ inputs.max-runners }}"
shards=$(seq 0 $((n-1)) | paste -sd, -)
echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT
Loading