Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions .github/scripts/generate_tritonbench_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/env python

import glob
import json
import logging
import os
from argparse import Action, ArgumentParser, Namespace
from logging import warning
from typing import Any, Dict, List, Optional


logging.basicConfig(level=logging.INFO)

# This mapping is needed to find out the platform of the runner
RUNNER_TO_PLATFORM_MAPPING = {
"linux.dgx.b200": "cuda",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just FYI, B200 runners are offline at the moment waiting for NVIDIA to re-image them, so it would take a few more days for them to be back to CI. The tracking issue is here pytorch/pytorch#169386

}

# TritonBench benchmarks
TRITONBENCH_BENCHMARKS = set(
[
"nightly",
]
)

def set_output(name: str, val: Any) -> None:
"""
Set the output value to be used by other GitHub jobs.

Args:
name (str): The name of the output variable.
val (Any): The value to set for the output variable.

Example:
set_output("benchmark_matrix", {"include": [...]})
"""
github_output = os.getenv("GITHUB_OUTPUT")

if not github_output:
print(f"::set-output name={name}::{val}")
return

with open(github_output, "a") as env:
env.write(f"{name}={val}\n")


def parse_args() -> Any:
parser = ArgumentParser("Generate TritonBench benchmark CI matrix")

parser.add_argument(
"--benchmarks",
type=str,
default="nightly",
help="the comma-separated list of benchmarks to run. Default to nightly.",
)
parser.add_argument(
"--runners",
type=str,
default="",
help="the comma-separated list of runners to run the benchmark. Required.",
required=True,
)

return parser.parse_args()

def generate_benchmark_matrix(benchmarks: List[str], runners: List[str]) -> Dict[str, Any]:
benchmark_matrix: Dict[str, Any] = {
"include": [],
}
if not runners:
runners = list(RUNNER_TO_PLATFORM_MAPPING.keys())
else:
runner_args = runners.copy()
runners = []
for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
for r in runner_args:
if r.lower() in k:
runners.append(k)

if not benchmarks:
benchmarks = TRITONBENCH_BENCHMARKS

# Gather all possible benchmarks
for runner in runners:
for benchmark in benchmarks:
benchmark_matrix["include"].append(
{
"runner": runner,
# I opt to return a comma-separated list of models here
# so that we could run multiple models on the same runner
"benchmarks": benchmark,
}
)

return benchmark_matrix


def main() -> None:
args = parse_args()
benchmarks = [b.strip().lower() for b in args.benchmarks.split(",") if b.strip()]
runners = [r.strip().lower() for r in args.runners.split(",") if r.strip()]
benchmark_matrix = generate_benchmark_matrix(benchmarks, runners)
print(benchmark_matrix)
set_output("benchmark_matrix", benchmark_matrix)


if __name__ == "__main__":
main()
26 changes: 26 additions & 0 deletions .github/scripts/test_generate_tritonbench_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import json

from expecttest import assert_expected_inline
from generate_tritonbench_matrix import generate_benchmark_matrix


def test_generate_benchmark_matrix():
# All combinations, no duplication
benchmarks = []
runners = []
output = json.dumps(
generate_benchmark_matrix(benchmarks, runners), indent=2
)
assert_expected_inline(
output,
"""\
{
"include": [
{
"runner": "linux.dgx.b200",
"benchmarks": "nightly"
}
]
}""",
)
222 changes: 222 additions & 0 deletions .github/workflows/tritonbench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
name: TritonBench

on:
schedule:
# Run every 12 hours
- cron: '0 */12 * * *'
workflow_dispatch:
tritonbench_branch:
description: TritonBench branch (main)
required: true
type: string
default: main
benchmarks:
description: |
A comma-separated list of benchmarks from tritonbench/benchmarks (optional, default to run nightly)
required: false
type: string
runners:
description: |
A comma-separated list of runners from .github/scripts/genenerate_tritonbench_matrix.py to run the benchmark (optional, default to run b200)
required: true
type: string
default: b200

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true


jobs:
set-parameters:
runs-on: ubuntu-latest
outputs:
benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Set parameters
id: set-parameters
shell: bash
env:
BENCHMARKS: ${{ inputs.benchmarks || '' }}
RUNNERS: ${{ inputs.runners || '' }}
run: |
set -eux

# The generated matrix is grouped by benchmark and runner
python .github/scripts/generate_tritonbench_matrix.py \
--benchmarks "${BENCHMARKS}" \
--runners "${RUNNERS}"


benchmarks:
name: Run TritonBench benchmarks
needs: set-parameters
if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
strategy:
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
environment: pytorch-x-vllm
permissions:
id-token: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install system dependencies
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y libnuma-dev numactl

- name: Checkout TritonBench repository
uses: actions/checkout@v4
with:
repository: meta-pytorch/tritonbench
path: triton-benchmarks/tritonbench
ref: ${{ inputs.tritonbench_branch || 'main' }}
fetch-depth: 0

- uses: actions/setup-python@v5
# Amazon Linux fails on this step
continue-on-error: true
with:
python-version: '3.12'
cache: 'pip'

- name: Check if the device is supported
shell: bash
run: |
set -eux

if command -v nvidia-smi; then
DEVICE_NAME=cuda
nvidia-smi
elif command -v rocm-smi; then
DEVICE_NAME=rocm
rocm-smi
else
DEVICE_NAME=cpu
lscpu
fi
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV

- name: Set GPU name and type
shell: bash
run: |
set -eux

if [[ "${DEVICE_NAME}" == "cuda" ]]; then
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
CUDA_HOME="/usr/local/cuda"
echo "CUDA_HOME=$CUDA_HOME" >> $GITHUB_ENV
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
fi
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV

- name: Setup CUDA GPU_FLAG for docker run
if: env.DEVICE_NAME == 'cuda'
run: |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

- name: Select TritonBench Docker image
shell: bash
run: |
set -eux
# Determine image suffix based on device
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
IMAGE_SUFFIX="latest"
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
IMAGE_SUFFIX="rocm-latest"
else
echo "TritonBench requires either CUDA or ROCm devices."
exit 1
fi

DOCKER_IMAGE="meta-pytorch/tritonbench:${IMAGE_SUFFIX}"
echo "DOCKER_IMAGE=$DOCKER_IMAGE" >> "$GITHUB_ENV"
echo "CONDA_ENV=triton-main" >> "$GITHUB_ENV"
echo "Using docker image: $DOCKER_IMAGE "
echo "Using conda env: $CONDA_ENV "

- name: Run TritonBench benchmark
run: |
set -eux

container_name=$(docker run \
${GPU_FLAG:-} \
-e DEVICE_NAME \
-e DEVICE_TYPE \
-e CONDA_ENV \
--ipc=host \
--tty \
--detach \
--security-opt seccomp=unconfined \
--shm-size=32g \
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
-w /tmp/workspace \
"${DOCKER_IMAGE}"
)

docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
set -eux && cd /workspace/tritonbench &&
bash .ci/tritonbench/run-benchmark.sh ${{ matrix.BENCHMARKS }} --conda-env ${{ env.CONDA_ENV }} "

docker exec -t -w /tmp/workspace "${container_name}" bash -c " \
set -eux && cd /workspace/tritonbench && mv .benchmarks /tmp/workspace/triton-benchmarks/tritonbench/results
"

- name: Authenticate with AWS
# AWS CUDA runners already have access to the bucket via its runner IAM role
if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1

# Keep a copy of the benchmark results on GitHub for reference
- uses: actions/upload-artifact@v4
if: always()
with:
name: tritonbench-results
path: triton-benchmarks/tritonbench/results
retention-days: 30

- name: Upload result to Scribe
working-directory: triton-benchmarks/tritonbench
run: |
latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1)
python3 ./.ci/upload/scribe.py --json ${latest_result_json}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any permission that we need to setup to make Scribe upload works here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, let me know if you need to setup TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN, not sure if that can be done self-serve

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we will need to set up TRITONBENCH_SCRIBE_GRAPHQL_ACCESS_TOKEN as a github secret.


- name: Rewrite Tritonbench result json to ClickHouse style
working-directory: triton-benchmarks/tritonbench
run: |
latest_result_json=$(find ./results/${TRITONBENCH_SIDE_A_ENV} -name "result.json" | sort -r | head -n 1)
python3 ./.ci/test_infra/oss_ci_benchmark_v3.py --json ${latest_result_json} \
--output clickhouse-results/result-${TRITONBENCH_SIDE_A_ENV}.json

- name: Upload result to ClickHouse
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: triton-benchmarks/tritonbench/clickhouse-results
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Kill the container
if: always()
run: |
docker kill "${TRITONBENCH_CONTAINER_ID}" || true