pytorch · xuzhao9 · Dec 3, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/.github/scripts/generate_tritonbench_matrix.py b/.github/scripts/generate_tritonbench_matrix.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+import glob
+import json
+import logging
+import os
+from argparse import Action, ArgumentParser, Namespace
+from logging import warning
+from typing import Any, Dict, List, Optional
+
+
+logging.basicConfig(level=logging.INFO)
+
+# This mapping is needed to find out the platform of the runner
+RUNNER_TO_PLATFORM_MAPPING = {
+    "linux.dgx.b200": "cuda",
+}
+
+# TritonBench benchmarks
+TRITONBENCH_BENCHMARKS = set(
+    [
+        "nightly",
+    ]
+)
+
+def set_output(name: str, val: Any) -> None:
+    """
+    Set the output value to be used by other GitHub jobs.
+
+    Args:
+        name (str): The name of the output variable.
+        val (Any): The value to set for the output variable.
+
+    Example:
+        set_output("benchmark_matrix", {"include": [...]})
+    """
+    github_output = os.getenv("GITHUB_OUTPUT")
+
+    if not github_output:
+        print(f"::set-output name={name}::{val}")
+        return
+
+    with open(github_output, "a") as env:
+        env.write(f"{name}={val}\n")
+
+
+def parse_args() -> Any:
+    parser = ArgumentParser("Generate TritonBench benchmark CI matrix")
+
+    parser.add_argument(
+        "--benchmarks",
+        type=str,
+        default="nightly",
+        help="the comma-separated list of benchmarks to run. Default to nightly.",
+    )
+    parser.add_argument(
+        "--runners",
+        type=str,
+        default="",
+        help="the comma-separated list of runners to run the benchmark. Required.",
+        required=True,
+    )
+
+    return parser.parse_args()
+
+def generate_benchmark_matrix(benchmarks: List[str], runners: List[str]) -> Dict[str, Any]:
+    benchmark_matrix: Dict[str, Any] = {
+        "include": [],
+    }
+    if not runners:
+        runners = list(RUNNER_TO_PLATFORM_MAPPING.keys())
+    else:
+        runner_args = runners.copy()
+        runners = []
+        for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
+            for r in runner_args:
+                if r.lower() in k:
+                    runners.append(k)
+
+    if not benchmarks:
+        benchmarks = TRITONBENCH_BENCHMARKS
+
+    # Gather all possible benchmarks
+    for runner in runners:
+        for benchmark in benchmarks:
+            benchmark_matrix["include"].append(
+                {
+                    "runner": runner,
+                    # I opt to return a comma-separated list of models here
+                    # so that we could run multiple models on the same runner
+                    "benchmarks": benchmark,
+                }
+            )
+
+    return benchmark_matrix
+
+
+def main() -> None:
+    args = parse_args()
+    benchmarks = [b.strip().lower() for b in args.benchmarks.split(",") if b.strip()]
+    runners = [r.strip().lower() for r in args.runners.split(",") if r.strip()]
+    benchmark_matrix = generate_benchmark_matrix(benchmarks, runners)
+    print(benchmark_matrix)
+    set_output("benchmark_matrix", benchmark_matrix)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/test_generate_tritonbench_matrix.py b/.github/scripts/test_generate_tritonbench_matrix.py
@@ -0,0 +1,26 @@
+import os
+import json
+
+from expecttest import assert_expected_inline
+from generate_tritonbench_matrix import generate_benchmark_matrix
+
+
+def test_generate_benchmark_matrix():
+    # All combinations, no duplication
+    benchmarks = []
+    runners = []
+    output = json.dumps(
+        generate_benchmark_matrix(benchmarks, runners), indent=2
+    )
+    assert_expected_inline(
+        output,
+        """\
+{
+  "include": [
+    {
+      "runner": "linux.dgx.b200",
+      "benchmarks": "nightly"
+    }
+  ]
+}""",
+    )
diff --git a/.github/workflows/tritonbench.yml b/.github/workflows/tritonbench.yml
@@ -0,0 +1,222 @@
+name: TritonBench
+
+on:
+  schedule:
+    # Run every 12 hours
+    - cron: '0 */12 * * *'
+  workflow_dispatch:
+    tritonbench_branch:
+      description: TritonBench branch (main)
+      required: true
+      type: string
+      default: main
+    benchmarks:
+      description: |
+        A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly)
+      required: false
+      type: string
+    runners:
+      description: |
+        A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200)
+      required: true
+      type: string
+      default: b200
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+
+jobs:
+  set-parameters:
+    runs-on: ubuntu-latest
+    outputs:
+      benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Set parameters
+        id: set-parameters
+        shell: bash
+        env:
+          BENCHMARKS: ${{ inputs.benchmarks || '' }}
+          RUNNERS: ${{ inputs.runners || '' }}
+        run: |
+          set -eux
+
+          # The generated matrix is grouped by benchmark and runner
+          python .github/scripts/generate_tritonbench_matrix.py \
+            --benchmarks "${BENCHMARKS}" \
+            --runners "${RUNNERS}"
+
+
+  benchmarks:
+    name: Run TritonBench benchmarks
+    needs: set-parameters
+    if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
+    strategy:
+      matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
+      fail-fast: false
+    runs-on: ${{ matrix.runner }}
+    environment: pytorch-x-vllm
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        shell: bash
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libnuma-dev numactl
+
+      - name: Checkout TritonBench repository
+        uses: actions/checkout@v4
+        with:
+          repository: meta-pytorch/tritonbench
+          path: triton-benchmarks/tritonbench
+          ref: ${{ inputs.tritonbench_branch || 'main' }}
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v5
+        # Amazon Linux fails on this step
+        continue-on-error: true
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+
+      - name: Check if the device is supported
+        shell: bash
+        run: |
+          set -eux
+
+          if command -v nvidia-smi; then
+            DEVICE_NAME=cuda
+            nvidia-smi
+          elif command -v rocm-smi; then
+            DEVICE_NAME=rocm
+            rocm-smi
+          else
+            DEVICE_NAME=cpu
+            lscpu
+          fi
+          echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
+
+      - name: Set GPU name and type
+        shell: bash
+        run: |
+          set -eux
+
+          if [[ "${DEVICE_NAME}" == "cuda" ]]; then
+            DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
+            CUDA_HOME="/usr/local/cuda"
+            echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
+          elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
+            DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
+          elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
+            DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
+          fi
+          echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
+
+      - name: Setup CUDA GPU_FLAG for docker run
+        if: env.DEVICE_NAME == 'cuda'
+        run: |
+          echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
+
+      - name: Select TritonBench Docker image
+        shell: bash
+        run: |
+          set -eux
+          # Determine image suffix based on device
+          if [[ "${DEVICE_NAME}" == "cuda" ]]; then
+            IMAGE_SUFFIX="latest"
+          elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
+            IMAGE_SUFFIX="rocm-latest"
+          else
+            echo "TritonBench requires either CUDA or ROCm devices."
+            exit 1
+          fi
+
+          DOCKER_IMAGE="meta-pytorch/tritonbench:${IMAGE_SUFFIX}"
+          echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
+          echo "CONDA_ENV=triton-main" >> "$GITHUB_ENV"
+          echo "Using docker image: $DOCKER_IMAGE "
+          echo "Using conda env: $CONDA_ENV "
+
+      - name: Run TritonBench benchmark
+        run: |
+          set -eux
+
+          container_name=$(docker run \
+            ${GPU_FLAG:-} \
+            -e DEVICE_NAME \
+            -e DEVICE_TYPE \
+            -e CONDA_ENV \
+            --ipc=host \
+            --tty \
+            --detach \
+            --security-opt seccomp=unconfined \
+            --shm-size=32g \
+            -v "${GITHUB_WORKSPACE}:/tmp/workspace" \
+            -w /tmp/workspace \
+            "${DOCKER_IMAGE}"
+          )
+
+          docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
+            set -eux && cd /workspace/tritonbench && 
+            bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} "
+
+          docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
+            set -eux && cd /workspace/tritonbench && mv .benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results
+          "
+
+      - name: Authenticate with AWS
+        # AWS CUDA runners already have access to the bucket via its runner IAM role
+        if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
+        uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
+          # The max duration enforced by the server side
+          role-duration-seconds: 18000
+          aws-region: us-east-1
+
+      # Keep a copy of the benchmark results on GitHub for reference
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: tritonbench-results
+          path: triton-benchmarks/tritonbench/results
+          retention-days: 30
+
+      - name: Upload result to Scribe
+        working-directory: triton-benchmarks/tritonbench
+        run: |
+          latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json"  | sort -r | head -n 1)
+          python3 ./.ci/upload/scribe.py --json ${latest_result_json}
+
+      - name: Rewrite Tritonbench result json to ClickHouse style
+        working-directory: triton-benchmarks/tritonbench
+        run: |
+          latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json"  | sort -r | head -n 1)
+          python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
+                --output clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json
+
+      - name: Upload result to ClickHouse
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results
+          dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Kill the container
+        if: always()
+        run: |
+          docker kill "${TRITONBENCH_CONTAINER_ID}" || true