-
Notifications
You must be signed in to change notification settings - Fork 22
[tritonbench] Add initial tritonbench benchmark config #110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| #!/usr/bin/env python | ||
|
|
||
| import glob | ||
| import json | ||
| import logging | ||
| import os | ||
| from argparse import Action, ArgumentParser, Namespace | ||
| from logging import warning | ||
| from typing import Any, Dict, List, Optional | ||
|
|
||
|
|
||
| logging.basicConfig(level=logging.INFO) | ||
|
|
||
| # This mapping is needed to find out the platform of the runner | ||
| RUNNER_TO_PLATFORM_MAPPING = { | ||
| "linux.dgx.b200": "cuda", | ||
| } | ||
|
|
||
| # TritonBench benchmarks | ||
| TRITONBENCH_BENCHMARKS = set( | ||
| [ | ||
| "nightly", | ||
| ] | ||
| ) | ||
|
|
||
| def set_output(name: str, val: Any) -> None: | ||
| """ | ||
| Set the output value to be used by other GitHub jobs. | ||
|
|
||
| Args: | ||
| name (str): The name of the output variable. | ||
| val (Any): The value to set for the output variable. | ||
|
|
||
| Example: | ||
| set_output("benchmark_matrix", {"include": [...]}) | ||
| """ | ||
| github_output = os.getenv("GITHUB_OUTPUT") | ||
|
|
||
| if not github_output: | ||
| print(f"::set-output name={name}::{val}") | ||
| return | ||
|
|
||
| with open(github_output, "a") as env: | ||
| env.write(f"{name}={val}\n") | ||
|
|
||
|
|
||
| def parse_args() -> Any: | ||
| parser = ArgumentParser("Generate TritonBench benchmark CI matrix") | ||
|
|
||
| parser.add_argument( | ||
| "--benchmarks", | ||
| type=str, | ||
| default="nightly", | ||
| help="the comma-separated list of benchmarks to run. Default to nightly.", | ||
| ) | ||
| parser.add_argument( | ||
| "--runners", | ||
| type=str, | ||
| default="", | ||
| help="the comma-separated list of runners to run the benchmark. Required.", | ||
| required=True, | ||
| ) | ||
|
|
||
| return parser.parse_args() | ||
|
|
||
| def generate_benchmark_matrix(benchmarks: List[str], runners: List[str]) -> Dict[str, Any]: | ||
| benchmark_matrix: Dict[str, Any] = { | ||
| "include": [], | ||
| } | ||
| if not runners: | ||
| runners = list(RUNNER_TO_PLATFORM_MAPPING.keys()) | ||
| else: | ||
| runner_args = runners.copy() | ||
| runners = [] | ||
| for k, v in RUNNER_TO_PLATFORM_MAPPING.items(): | ||
| for r in runner_args: | ||
| if r.lower() in k: | ||
| runners.append(k) | ||
|
|
||
| if not benchmarks: | ||
| benchmarks = TRITONBENCH_BENCHMARKS | ||
|
|
||
| # Gather all possible benchmarks | ||
| for runner in runners: | ||
| for benchmark in benchmarks: | ||
| benchmark_matrix["include"].append( | ||
| { | ||
| "runner": runner, | ||
| # I opt to return a comma-separated list of models here | ||
| # so that we could run multiple models on the same runner | ||
| "benchmarks": benchmark, | ||
| } | ||
| ) | ||
|
|
||
| return benchmark_matrix | ||
|
|
||
|
|
||
| def main() -> None: | ||
| args = parse_args() | ||
| benchmarks = [b.strip().lower() for b in args.benchmarks.split(",") if b.strip()] | ||
| runners = [r.strip().lower() for r in args.runners.split(",") if r.strip()] | ||
| benchmark_matrix = generate_benchmark_matrix(benchmarks, runners) | ||
| print(benchmark_matrix) | ||
| set_output("benchmark_matrix", benchmark_matrix) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| import os | ||
| import json | ||
|
|
||
| from expecttest import assert_expected_inline | ||
| from generate_tritonbench_matrix import generate_benchmark_matrix | ||
|
|
||
|
|
||
| def test_generate_benchmark_matrix(): | ||
| # All combinations, no duplication | ||
| benchmarks = [] | ||
| runners = [] | ||
| output = json.dumps( | ||
| generate_benchmark_matrix(benchmarks, runners), indent=2 | ||
| ) | ||
| assert_expected_inline( | ||
| output, | ||
| """\ | ||
| { | ||
| "include": [ | ||
| { | ||
| "runner": "linux.dgx.b200", | ||
| "benchmarks": "nightly" | ||
| } | ||
| ] | ||
| }""", | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,222 @@ | ||
| name: TritonBench | ||
|
|
||
| on: | ||
| schedule: | ||
| # Run every 12 hours | ||
| - cron: '0 */12 * * *' | ||
| workflow_dispatch: | ||
| tritonbench_branch: | ||
| description: TritonBench branch (main) | ||
| required: true | ||
| type: string | ||
| default: main | ||
| benchmarks: | ||
| description: | | ||
| A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly) | ||
| required: false | ||
| type: string | ||
| runners: | ||
| description: | | ||
| A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200) | ||
| required: true | ||
| type: string | ||
| default: b200 | ||
|
|
||
| concurrency: | ||
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | ||
| cancel-in-progress: true | ||
|
|
||
|
|
||
| jobs: | ||
| set-parameters: | ||
| runs-on: ubuntu-latest | ||
| outputs: | ||
| benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }} | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: '3.12' | ||
|
|
||
| - name: Set parameters | ||
| id: set-parameters | ||
| shell: bash | ||
| env: | ||
| BENCHMARKS: ${{ inputs.benchmarks || '' }} | ||
| RUNNERS: ${{ inputs.runners || '' }} | ||
| run: | | ||
| set -eux | ||
|
|
||
| # The generated matrix is grouped by benchmark and runner | ||
| python .github/scripts/generate_tritonbench_matrix.py \ | ||
| --benchmarks "${BENCHMARKS}" \ | ||
| --runners "${RUNNERS}" | ||
|
|
||
|
|
||
| benchmarks: | ||
| name: Run TritonBench benchmarks | ||
| needs: set-parameters | ||
| if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }} | ||
| strategy: | ||
| matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }} | ||
| fail-fast: false | ||
| runs-on: ${{ matrix.runner }} | ||
| environment: pytorch-x-vllm | ||
| permissions: | ||
| id-token: write | ||
| contents: read | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Install system dependencies | ||
| shell: bash | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y libnuma-dev numactl | ||
|
|
||
| - name: Checkout TritonBench repository | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| repository: meta-pytorch/tritonbench | ||
| path: triton-benchmarks/tritonbench | ||
| ref: ${{ inputs.tritonbench_branch || 'main' }} | ||
| fetch-depth: 0 | ||
|
|
||
| - uses: actions/setup-python@v5 | ||
| # Amazon Linux fails on this step | ||
| continue-on-error: true | ||
| with: | ||
| python-version: '3.12' | ||
| cache: 'pip' | ||
|
|
||
| - name: Check if the device is supported | ||
| shell: bash | ||
| run: | | ||
| set -eux | ||
|
|
||
| if command -v nvidia-smi; then | ||
| DEVICE_NAME=cuda | ||
| nvidia-smi | ||
| elif command -v rocm-smi; then | ||
| DEVICE_NAME=rocm | ||
| rocm-smi | ||
| else | ||
| DEVICE_NAME=cpu | ||
| lscpu | ||
| fi | ||
| echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV | ||
|
|
||
| - name: Set GPU name and type | ||
| shell: bash | ||
| run: | | ||
| set -eux | ||
|
|
||
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | ||
| DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | ||
| CUDA_HOME="/usr/local/cuda" | ||
| echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV | ||
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | ||
| DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) | ||
| elif [[ "${DEVICE_NAME}" == "cpu" ]]; then | ||
| DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ") | ||
| fi | ||
| echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV | ||
|
|
||
| - name: Setup CUDA GPU_FLAG for docker run | ||
| if: env.DEVICE_NAME == 'cuda' | ||
| run: | | ||
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | ||
|
|
||
| - name: Select TritonBench Docker image | ||
| shell: bash | ||
| run: | | ||
| set -eux | ||
| # Determine image suffix based on device | ||
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | ||
| IMAGE_SUFFIX="latest" | ||
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | ||
| IMAGE_SUFFIX="rocm-latest" | ||
| else | ||
| echo "TritonBench requires either CUDA or ROCm devices." | ||
| exit 1 | ||
| fi | ||
|
|
||
| DOCKER_IMAGE="meta-pytorch/tritonbench:${IMAGE_SUFFIX}" | ||
| echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV" | ||
| echo "CONDA_ENV=triton-main" >> "$GITHUB_ENV" | ||
| echo "Using docker image: $DOCKER_IMAGE " | ||
| echo "Using conda env: $CONDA_ENV " | ||
|
|
||
| - name: Run TritonBench benchmark | ||
| run: | | ||
| set -eux | ||
|
|
||
| container_name=$(docker run \ | ||
| ${GPU_FLAG:-} \ | ||
| -e DEVICE_NAME \ | ||
| -e DEVICE_TYPE \ | ||
| -e CONDA_ENV \ | ||
| --ipc=host \ | ||
| --tty \ | ||
| --detach \ | ||
| --security-opt seccomp=unconfined \ | ||
| --shm-size=32g \ | ||
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | ||
| -w /tmp/workspace \ | ||
| "${DOCKER_IMAGE}" | ||
| ) | ||
|
|
||
| docker exec -t -w /tmp/workspace "${container_name}" bash -c " \ | ||
| set -eux && cd /workspace/tritonbench && | ||
| bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} " | ||
|
|
||
| docker exec -t -w /tmp/workspace "${container_name}" bash -c " \ | ||
| set -eux && cd /workspace/tritonbench && mv .benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results | ||
| " | ||
|
|
||
| - name: Authenticate with AWS | ||
| # AWS CUDA runners already have access to the bucket via its runner IAM role | ||
| if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') | ||
| uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | ||
| with: | ||
| role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | ||
| # The max duration enforced by the server side | ||
| role-duration-seconds: 18000 | ||
| aws-region: us-east-1 | ||
|
|
||
| # Keep a copy of the benchmark results on GitHub for reference | ||
| - uses: actions/upload-artifact@v4 | ||
| if: always() | ||
| with: | ||
| name: tritonbench-results | ||
| path: triton-benchmarks/tritonbench/results | ||
| retention-days: 30 | ||
|
|
||
| - name: Upload result to Scribe | ||
| working-directory: triton-benchmarks/tritonbench | ||
| run: | | ||
| latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) | ||
| python3 ./.ci/upload/scribe.py --json ${latest_result_json} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any permission that we need to setup to make Scribe upload works here?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, let me know if you need to setup
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we will need to set up |
||
|
|
||
| - name: Rewrite Tritonbench result json to ClickHouse style | ||
| working-directory: triton-benchmarks/tritonbench | ||
| run: | | ||
| latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1) | ||
| python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \ | ||
| --output clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json | ||
|
|
||
| - name: Upload result to ClickHouse | ||
| uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main | ||
| with: | ||
| benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results | ||
| dry-run: false | ||
| schema-version: v3 | ||
| github-token: ${{ secrets.GITHUB_TOKEN }} | ||
|
|
||
| - name: Kill the container | ||
| if: always() | ||
| run: | | ||
| docker kill "${TRITONBENCH_CONTAINER_ID}" || true | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just FYI, B200 runners are offline at the moment waiting for NVIDIA to re-image them, so it would take a few more days for them to be back to CI. The tracking issue is here pytorch/pytorch#169386