diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 87cd26f02..1beb3e10e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -21,30 +21,18 @@ on: alias: required: true type: string - max-runners: + num-shards: + required: true + type: number + description: "Number of shards benchmark is running on" + shard: required: true type: number description: "Maximum parallel runners to determine shards" jobs: - compute-matrix: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.gen.outputs.matrix }} - steps: - - id: gen - run: | - n="${{ inputs.max-runners }}" - shards=$(seq 0 $((n-1)) | paste -sd, -) - echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT - benchmark: - name: benchmark-${{ inputs.runtime-version }}-py${{ inputs.python-version }}-${{ inputs.alias }} - needs: compute-matrix - - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.compute-matrix.outputs.matrix) }} + name: benchmark-${{ inputs.runtime-version }}-shard${{ inputs.shard }}-py${{ inputs.python-version }}-${{ inputs.alias }} container: image: ${{ inputs.image }} @@ -59,6 +47,11 @@ jobs: run: shell: bash -l {0} + outputs: + benchmark-metadata: ${{ steps.gather-benchmark-metadata.outputs.benchmark-metadata }} + runners-info: ${{ steps.gather-runners-info.outputs.runners-info }} + dependencies: ${{ steps.gather-dependencies.outputs.dependencies }} + steps: - name: Check out code uses: actions/checkout@v4 @@ -129,8 +122,8 @@ jobs: source .venv/bin/activate KERNELS=("softmax" "geglu" "swiglu" "jsd" "welford" "kl_div" "int4_gemm" "layer_norm" "layer_norm-bwd" "rms_norm" "rms_norm-bwd" "cross_entropy") - NUMSHARDS=${{ matrix.num_shards }} - SHARD=${{ matrix.shard }} + NUMSHARDS=${{ inputs.num-shards }} + SHARD=${{ inputs.shard }} SHARD_KERNELS=() for ((i=0; i<${#KERNELS[@]}; i++)); do @@ -203,19 +196,41 @@ jobs: fi cat "$TEST_REPORTS_DIR/helionbench.json" - - name: Authenticate with AWS - uses: aws-actions/configure-aws-credentials@v4 + - name: Gather benchmark metadata + id: gather-benchmark-metadata + uses: pytorch/test-infra/.github/actions/gather-benchmark-metadata@main with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results - # The max duration enforced by the server side - role-duration-seconds: 18000 - aws-region: us-east-1 + github-token: ${{ secrets.GITHUB_TOKEN }} + venv: .venv/bin/activate - - name: Upload the benchmark results to OSS benchmark database for the dashboard - uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main + - name: Gather runners info + id: gather-runners-info + uses: pytorch/test-infra/.github/actions/gather-runners-info@main with: - benchmark-results-dir: test/test-reports - dry-run: false - schema-version: v3 - github-token: ${{ secrets.GITHUB_TOKEN }} - venv: ".venv/bin/activate" + venv: .venv/bin/activate + + - name: Gather dependencies + id: gather-dependencies + uses: pytorch/test-infra/.github/actions/gather-dependencies@main + with: + venv: .venv/bin/activate + + - name: Upload the benchmark results to GitHub + uses: actions/upload-artifact@v4 + with: + name: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }} + path: test/test-reports + + upload-benchmark-results: + needs: benchmark + uses: pytorch/test-infra/.github/workflows/upload_benchmark_results.yml@main + permissions: + id-token: write + contents: read + with: + benchmark-artifact: benchmark-results-${{ inputs.alias }}-${{ inputs.shard }} + benchmark-metadata: ${{ needs.benchmark.outputs.benchmark-metadata }} + runners-info: ${{ needs.benchmark.outputs.runners-info }} + dependencies: ${{ needs.benchmark.outputs.dependencies }} + schema-version: v3 + dry-run: false diff --git a/.github/workflows/benchmark_dispatch.yml b/.github/workflows/benchmark_dispatch.yml index 7f5be9d16..fa351adc2 100644 --- a/.github/workflows/benchmark_dispatch.yml +++ b/.github/workflows/benchmark_dispatch.yml @@ -22,9 +22,17 @@ on: - cron: '0 8 * * *' # Runs at midnight PST (8 AM UTC) jobs: - run-h100: + gen-matrix-h100: if: ${{ github.event.inputs.run_h100 == 'true' || github.event_name == 'schedule' }} + uses: ./.github/workflows/compute-benchmark-matrix.yml + with: + max-runners: 12 + + run-h100: + needs: gen-matrix-h100 uses: ./.github/workflows/benchmark.yml + strategy: + matrix: ${{ fromJSON(needs.gen-matrix-h100.outputs.matrix) }} permissions: id-token: write contents: read @@ -35,11 +43,20 @@ jobs: runtime-version: cu129 container-options: --gpus all alias: h100 + num-shards: ${{ matrix.num_shards }} + shard: ${{ matrix.shard }} + + gen-matrix-b200: + uses: ./.github/workflows/compute-benchmark-matrix.yml + if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }} + with: max-runners: 12 run-b200: - if: ${{ github.event.inputs.run_b200 == 'true' || github.event_name == 'schedule' }} + needs: gen-matrix-b200 uses: ./.github/workflows/benchmark.yml + strategy: + matrix: ${{ fromJSON(needs.gen-matrix-b200.outputs.matrix) }} permissions: id-token: write contents: read @@ -50,11 +67,20 @@ jobs: runtime-version: cu129 container-options: --gpus all alias: b200 - max-runners: 12 + num-shards: ${{ matrix.num_shards }} + shard: ${{ matrix.shard }} - run-mi325x: + gen-matrix-mi325x: + uses: ./.github/workflows/compute-benchmark-matrix.yml if: ${{ github.event.inputs.run_mi325x == 'true' || github.event_name == 'schedule' }} + with: + max-runners: 6 + + run-mi325x: + needs: gen-matrix-mi325x uses: ./.github/workflows/benchmark.yml + strategy: + matrix: ${{ fromJSON(needs.gen-matrix-mi325x.outputs.matrix) }} permissions: id-token: write contents: read @@ -65,4 +91,5 @@ jobs: runtime-version: rocm7.0 container-options: --device=/dev/kfd --device=/dev/dri alias: mi325x - max-runners: 6 + num-shards: ${{ matrix.num_shards }} + shard: ${{ matrix.shard }} diff --git a/.github/workflows/compute-benchmark-matrix.yml b/.github/workflows/compute-benchmark-matrix.yml new file mode 100644 index 000000000..160210814 --- /dev/null +++ b/.github/workflows/compute-benchmark-matrix.yml @@ -0,0 +1,24 @@ +name: Compute Benchmark Matrix + +on: + workflow_call: + inputs: + max-runners: + required: true + type: string + outputs: + matrix: + description: "The generated matrix for sharding" + value: ${{ jobs.gen.outputs.matrix }} + +jobs: + gen: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.gen.outputs.matrix }} + steps: + - id: gen + run: | + n="${{ inputs.max-runners }}" + shards=$(seq 0 $((n-1)) | paste -sd, -) + echo "matrix={\"shard\": [${shards}], \"num_shards\": [${n}]}" >> $GITHUB_OUTPUT