Skip to content

Commit

Permalink
[CI] Collect inductor max-autotune performance every Sunday
Browse files Browse the repository at this point in the history
ghstack-source-id: 2833907826b7d803988531ccbcab6c99563196a3
Pull Request resolved: #99387
  • Loading branch information
desertfire committed Apr 18, 2023
1 parent 7ff1f3f commit de6efcb
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 17 deletions.
37 changes: 21 additions & 16 deletions .ci/pytorch/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ else
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
fi

if [[ "${TEST_CONFIG}" == *max_autotune* ]]; then
export TORCHINDUCTOR_MAX_AUTOTUNE=1
fi

test_perf_for_dashboard() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
Expand All @@ -292,30 +296,31 @@ test_perf_for_dashboard() {
# Run accuracy test for inductor with different configs
# --disable-cudagraphs is the default inductor behavior
# TODO: update here once cudagraphs is turned on as default
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_cuda_accuracy.csv"
if [[ "${TEST_CONFIG}" != *max_autotune* ]]; then
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_cuda_accuracy.csv"
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_accuracy.csv"
# Only test this one config for max-autotune
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$mode" --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_cuda_accuracy.csv"
python "benchmarks/dynamo/$suite.py" \
--accuracy --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes --dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_accuracy.csv"

# Run performance test
# Skip dynamo-eager and aot-eager for performance test
# Run performance test for inductor with different configs
# TODO: add more configs here, e.g. max-autotune, etc.
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_cuda_performance.csv"
if [[ "${TEST_CONFIG}" != *max_autotune* ]]; then
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_cuda_performance.csv"
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes \
--dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_performance.csv"
# Only test this one config for max-autotune
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$mode" --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_cuda_performance.csv"
python "benchmarks/dynamo/$suite.py" \
--performance --cold-start-latency --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes \
--dynamic-batch-only --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_performance.csv"
done
}

Expand Down
45 changes: 45 additions & 0 deletions .github/workflows/inductor-perf-max-autotune-weekly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: inductor-A100-max-autotune-weekly

on:
schedule:
- cron: 0 0 * * 0
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true

jobs:
linux-bionic-cuda11_8-py3_10-gcc7-inductor-build:
name: cuda11.8-py3.10-gcc7-sm80
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm80
docker-image-name: pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7
cuda-arch-list: '8.0'
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_max_autotune", shard: 1, num_shards: 3, runner: "linux.gcp.a100.large" },
{ config: "inductor_huggingface_perf_max_autotune", shard: 2, num_shards: 3, runner: "linux.gcp.a100.large" },
{ config: "inductor_huggingface_perf_max_autotune", shard: 3, num_shards: 3, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 1, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 2, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 3, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 4, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 5, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_timm_perf_max_autotune", shard: 6, num_shards: 6, runner: "linux.gcp.a100.large" },
{ config: "inductor_torchbench_perf_max_autotune", shard: 1, num_shards: 3, runner: "linux.gcp.a100.large" },
{ config: "inductor_torchbench_perf_max_autotune", shard: 2, num_shards: 3, runner: "linux.gcp.a100.large" },
{ config: "inductor_torchbench_perf_max_autotune", shard: 3, num_shards: 3, runner: "linux.gcp.a100.large" },
]}
linux-bionic-cuda11_8-py3_10-gcc7-inductor-test:
name: cuda11.8-py3.10-gcc7-sm80
uses: ./.github/workflows/_linux-test.yml
needs: linux-bionic-cuda11_8-py3_10-gcc7-inductor-build
with:
build-environment: linux-bionic-cuda11.8-py3.10-gcc7-sm80
docker-image: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_8-py3_10-gcc7-inductor-build.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha
timeout-minutes: 720
2 changes: 1 addition & 1 deletion .github/workflows/inductor-perf-test-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: inductor-A100-perf-nightly

on:
schedule:
- cron: 45 1,13 * * *
- cron: 45 1,13 * * 1-6
workflow_dispatch:

concurrency:
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/dynamo/torchbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def setup_torchbench_cwd():
"fambench_xlmr",
# https://github.com/pytorch/pytorch/issues/99201
"opacus_cifar10",
# TIMEOUT, https://github.com/pytorch/pytorch/issues/98467
"tacotron2",
}

SKIP_FOR_CUDA = {
Expand Down

0 comments on commit de6efcb

Please sign in to comment.