From 05c47c4e235db004d59bf53d0f33ff751a1d3c02 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 10:40:29 -0400 Subject: [PATCH 1/9] Install torchao --- .github/workflows/_linux-benchmark-cuda.yml | 104 ++++++++++++++++++++ .github/workflows/torchao.yml | 104 ++++++-------------- userbenchmark/torchao/install.py | 9 +- userbenchmark/torchao/run.py | 33 ++++--- 4 files changed, 163 insertions(+), 87 deletions(-) create mode 100644 .github/workflows/_linux-benchmark-cuda.yml diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml new file mode 100644 index 0000000000..934f71f9e8 --- /dev/null +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -0,0 +1,104 @@ +name: linux-benchmark-cuda +on: + workflow_call: + inputs: + userbenchmark: + required: true + type: string + description: Name of the benchmark + userbenchmark-install-args: + required: false + type: string + default: "" + description: Userbenchmark installation command line arguments + userbenchmark-run-args: + required: true + type: string + description: Userbenchmark run command line arguments + secrets: + HUGGING_FACE_HUB_TOKEN: + required: false + description: | + HF auth token to avoid rate limits when downloading models or datasets from hub + AWS_ACCESS_KEY_ID: + required: true + description: | + AWS access token for S3 uploading + AWS_SECRET_ACCESS_KEY: + required: true + description: | + AWS secret access key for S3 uploading + +jobs: + # Run a specific userbenchmark with given arguments + # Need to pass in userbenchmark name and arguments + benchmark: + # Don't run on forked repos + if: github.repository_owner == 'pytorch' + runs-on: [a100-runner] + timeout-minutes: 1440 # 24 hours + environment: docker-s3-upload + env: + BASE_CONDA_ENV: "torchbench" + CONDA_ENV: "userbenchmark" + SETUP_SCRIPT: "/workspace/setup_instance.sh" + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + steps: + - name: Checkout TorchBench + uses: actions/checkout@v3 + with: + path: benchmark + - name: Tune Nvidia GPU + run: | + sudo nvidia-smi -pm 1 + sudo nvidia-smi -ac 1215,1410 + sudo ldconfig + nvidia-smi + - name: Remove result if it already exists + if: always() + run: | + . "${SETUP_SCRIPT}" + # remove old results if exists + if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi + pushd benchmark + if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi + - name: Install benchmark + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + bash ./.ci/torchbench/install.sh --userbenchmark ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-install-args }} + - name: Run benchmark + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + # remove old results if exists + if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi + if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi + python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} + - name: Copy the benchmark logs to benchmark-output + if: always() + run: | + pushd benchmark + cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output + - name: Upload result to GH Actions Artifact + uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ inputs.userbenchmark }} benchmarking result + path: benchmark-output/ + - name: Copy artifact and upload to Amazon S3 + env: + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + # Upload the result json to Amazon S3 + python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \ + --upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv" + - name: Clean up Conda env + if: always() + run: | + . "${SETUP_SCRIPT}" + conda deactivate && conda deactivate + conda remove -n "${CONDA_ENV}" --all diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index f82b7fb0f2..326a58ae14 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -1,80 +1,38 @@ name: Torchao nightly workflow (A100) on: workflow_dispatch: - + schedule: + - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST jobs: - run-benchmark: - environment: docker-s3-upload - env: - BASE_CONDA_ENV: "torchbench" - CONDA_ENV: "torchao-nightly" - PLATFORM_NAME: "gcp_a100" - SETUP_SCRIPT: "/workspace/setup_instance.sh" - TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} + torchbench: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --torchbench" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + timm: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --timm" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - IS_GHA: 1 - BUILD_ENVIRONMENT: benchmark-nightly - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: [a100-runner] - timeout-minutes: 1440 # 24 hours - steps: - - name: Checkout TorchBench - uses: actions/checkout@v3 - with: - path: benchmark - - name: Tune Nvidia GPU - run: | - sudo nvidia-smi -pm 1 - sudo nvidia-smi -ac 1215,1410 - nvidia-smi - sudo ldconfig - - name: Clone and setup conda env - run: | - CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" - conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" - - name: Run the torchao userbenchmark - env: - WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} - WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} - run: | - . "${SETUP_SCRIPT}" - set -x - # remove old results if exists - if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi - pushd benchmark - if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi - # Install torchao - echo "Installing torchao" - pip uninstall -y torchao - python install.py --userbenchmark torchao - echo "Running the torchao userbenchmark" - python run_benchmark.py torchao --ci --dashboard - - name: Copy the benchmark logs to benchmark-output - if: always() - run: | - pushd benchmark - cp -r ./.userbenchmark/torchao ../benchmark-output - - name: Upload result to GH Actions Artifact - uses: actions/upload-artifact@v3 - if: always() - with: - name: Torchao nightly result - path: benchmark-output/ - - name: Copy artifact and upload to scribe and Amazon S3 - env: - WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} - WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} - run: | - . "${SETUP_SCRIPT}" - pushd benchmark - # Upload the result json to Amazon S3 - python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \ - --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv" - - name: Clean up Conda env - if: always() - run: | - . "${SETUP_SCRIPT}" - conda deactivate && conda deactivate - conda remove -n "${CONDA_ENV}" --all + huggingface: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --huggingface" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true diff --git a/userbenchmark/torchao/install.py b/userbenchmark/torchao/install.py index 9d491f6322..57af33f2c6 100644 --- a/userbenchmark/torchao/install.py +++ b/userbenchmark/torchao/install.py @@ -1,13 +1,18 @@ import os import subprocess +def uninstall_torchao(): + cmd = ["pip", "uninstall", "-y", "torchao"] + subprocess.check_call(cmd) + def install_torchao(): # Set ARCH list so that we can build fp16 with SM75+, the logic is copied from # pytorch/builder # https://github.com/pytorch/ao/blob/main/packaging/env_var_script_linux.sh#L16C1-L19 - torchao_env = os.environ + torchao_env = os.environ.copy() torchao_env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" subprocess.check_call(["pip", "install", "--pre", "git+https://github.com/pytorch/ao.git"], env=torchao_env) if __name__ == "__main__": - install_torchao() \ No newline at end of file + uninstall_torchao() + install_torchao() diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index ad55fb8afc..06f7379453 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -9,18 +9,16 @@ OUTPUT_DIR.mkdir(exist_ok=True, parents=True) CI_ARGS = [ - # TIMM - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], + ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], + ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], + ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], + ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], + ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], + ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], + ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], + ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], ] - def _get_output(pt2_args): if "--output" in pt2_args: output_index = pt2_args.index("--output") @@ -28,7 +26,6 @@ def _get_output(pt2_args): return "not_available" - def _run_pt2_args(pt2_args: List[str]) -> str: from userbenchmark.dynamo.run import run as run_pt2_benchmark print(f"=================== [TORCHAO] Running PT2 Benchmark Runner with Args: {pt2_args} ===================") @@ -38,14 +35,26 @@ def _run_pt2_args(pt2_args: List[str]) -> str: def run(args: List[str]): parser = argparse.ArgumentParser() parser.add_argument("--ci", action="store_true", help="Run the CI workflow") + parser.add_argument("--timm", action="store_true", help="Run the TIMM CI workflow") + parser.add_argument("--huggingface", action="store_true", help="Run the Huggingface CI workflow") + parser.add_argument("--torchbench", action="store_true", help="Run the Torchbench CI workflow") parser.add_argument("--dashboard", action="store_true", help="Update the output files to prepare the S3 upload and dashboard.") args, pt2_args = parser.parse_known_args(args) if args.ci: group_pt2_args = CI_ARGS + if args.timm: + group_pt2_args.append("--timm") + elif args.huggingface: + group_pt2_args.append("--huggingface") + elif args.torchbench: + group_pt2_args.append("--torchbench") + else: + raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench") else: group_pt2_args = [pt2_args] - + + output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args] # Post-processing if args.dashboard: From 1fefb90fbe01571b25f02f29e16caeadb1c09c2f Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 10:43:25 -0400 Subject: [PATCH 2/9] Bugfix --- .github/workflows/_linux-benchmark-cuda.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 934f71f9e8..3f6c1531b0 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -57,11 +57,14 @@ jobs: - name: Remove result if it already exists if: always() run: | - . "${SETUP_SCRIPT}" # remove old results if exists if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi pushd benchmark if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi + - name: Clone and setup conda env + run: | + CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" + conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" - name: Install benchmark run: | . "${SETUP_SCRIPT}" @@ -75,12 +78,12 @@ jobs: if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} - - name: Copy the benchmark logs to benchmark-output + - name: Copy benchmark logs if: always() run: | pushd benchmark cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output - - name: Upload result to GH Actions Artifact + - name: Upload benchmark result to GH Actions Artifact uses: actions/upload-artifact@v3 if: always() with: From 7003799951d92512c63a803b11f14b3f92122d55 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 12:04:54 -0400 Subject: [PATCH 3/9] Update the ci file --- .github/workflows/torchao.yml | 36 ++++++++++++------------ userbenchmark/torchao/run.py | 52 +++++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index 326a58ae14..3ff3e64a14 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -5,15 +5,6 @@ on: - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST jobs: - torchbench: - uses: ./.github/workflows/_linux-benchmark-cuda.yml - with: - userbenchmark: "torchao" - userbenchmark-run-args: "--ci --dashboard --torchbench" - secrets: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} timm: uses: ./.github/workflows/_linux-benchmark-cuda.yml with: @@ -23,15 +14,24 @@ jobs: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - huggingface: - uses: ./.github/workflows/_linux-benchmark-cuda.yml - with: - userbenchmark: "torchao" - userbenchmark-run-args: "--ci --dashboard --huggingface" - secrets: - HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + # torchbench: + # uses: ./.github/workflows/_linux-benchmark-cuda.yml + # with: + # userbenchmark: "torchao" + # userbenchmark-run-args: "--ci --dashboard --torchbench" + # secrets: + # HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + # huggingface: + # uses: ./.github/workflows/_linux-benchmark-cuda.yml + # with: + # userbenchmark: "torchao" + # userbenchmark-run-args: "--ci --dashboard --huggingface" + # secrets: + # HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index 06f7379453..eb0af49824 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -1,4 +1,5 @@ import argparse +import itertools from userbenchmark.utils import get_output_dir from typing import List @@ -8,16 +9,35 @@ OUTPUT_DIR = get_output_dir(BM_NAME) OUTPUT_DIR.mkdir(exist_ok=True, parents=True) -CI_ARGS = [ - ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], -] + +def _get_ci_args(backend: str, modelset: str, dtype, mode: str, device: str, experiment: str) -> List[List[str]]: + if modelset == "timm": + modelset_full_name = "timm_models" + else: + modelset_full_name = modelset + output_file_name = f"torchao_{backend}_{modelset_full_name}_{dtype}_{mode}_{device}_{experiment}.csv" + ci_args = [ + "--progress", + f"--{modelset}", + "--quantization", + f"{backend}", + f"--{mode}", + f"--{dtype}", + f"--{experiment}", + "--output", + f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}" + ] + return ci_args + +def _get_full_ci_args(modelset: str) -> List[List[str]]: + backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] + modelset = [modelset] + dtype = ["bfloat16"] + mode = ["inference"] + device = ["cuda"] + experiment = ["performance", "accuracy"] + cfgs = itertools.product(*[backends, modelset, dtype, mode, device, experiment]) + return [ _get_ci_args(*cfg) for cfg in cfgs] def _get_output(pt2_args): if "--output" in pt2_args: @@ -42,20 +62,18 @@ def run(args: List[str]): args, pt2_args = parser.parse_known_args(args) if args.ci: - group_pt2_args = CI_ARGS if args.timm: - group_pt2_args.append("--timm") + benchmark_args = _get_full_ci_args(modelset="timm") elif args.huggingface: - group_pt2_args.append("--huggingface") + benchmark_args = _get_full_ci_args(modelset="huggingface") elif args.torchbench: - group_pt2_args.append("--torchbench") + benchmark_args = _get_full_ci_args(modelset="torchbench") else: raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench") else: - group_pt2_args = [pt2_args] - + benchmark_args = [pt2_args] - output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args] + output_files = [_run_pt2_args(args) for args in benchmark_args] # Post-processing if args.dashboard: post_ci_process(output_files) From 868b6b8a884c2d0dc07c71675a6e0c380ac8e87f Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 12:08:11 -0400 Subject: [PATCH 4/9] Run install with the rest of the benchmark --- .ci/torchbench/install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/torchbench/install.sh b/.ci/torchbench/install.sh index 4828b67118..16dcfc6e8d 100644 --- a/.ci/torchbench/install.sh +++ b/.ci/torchbench/install.sh @@ -26,4 +26,5 @@ parent_dir=$(dirname "$(readlink -f "$0")")/../.. cd ${parent_dir} python -c "import torch; print(torch.__version__); print(torch.version.git_version)" -python install.py + +python install.py $@ From 7a98b8d4beec645050937d452f7875a2a83b4c6c Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 15:40:31 -0400 Subject: [PATCH 5/9] Add quant api --- .github/workflows/_linux-benchmark-cuda.yml | 3 --- userbenchmark/dynamo/dynamobench/torchao_backend.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 3f6c1531b0..b8903b02c3 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -74,9 +74,6 @@ jobs: run: | . "${SETUP_SCRIPT}" pushd benchmark - # remove old results if exists - if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi - if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} - name: Copy benchmark logs if: always() diff --git a/userbenchmark/dynamo/dynamobench/torchao_backend.py b/userbenchmark/dynamo/dynamobench/torchao_backend.py index 29e7d55d76..f02672928b 100644 --- a/userbenchmark/dynamo/dynamobench/torchao_backend.py +++ b/userbenchmark/dynamo/dynamobench/torchao_backend.py @@ -14,7 +14,7 @@ def setup_baseline(): def torchao_optimize_ctx(quantization: str): import torchao - from torchao.quantization import ( + from torchao.quantization.quant_api import ( change_linear_weights_to_int4_woqtensors, change_linear_weights_to_int8_dqtensors, change_linear_weights_to_int8_woqtensors, From 3c3544a01eb0a834fb3babbc170533e90e17d452 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Wed, 26 Jun 2024 15:59:25 -0400 Subject: [PATCH 6/9] Enable torchbench and huggingface models --- .github/workflows/torchao.yml | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index 3ff3e64a14..b5d43d9b9b 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -14,24 +14,24 @@ jobs: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - # torchbench: - # uses: ./.github/workflows/_linux-benchmark-cuda.yml - # with: - # userbenchmark: "torchao" - # userbenchmark-run-args: "--ci --dashboard --torchbench" - # secrets: - # HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - # huggingface: - # uses: ./.github/workflows/_linux-benchmark-cuda.yml - # with: - # userbenchmark: "torchao" - # userbenchmark-run-args: "--ci --dashboard --huggingface" - # secrets: - # HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + torchbench: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --torchbench" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + huggingface: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --huggingface" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} From fe9a9b7b4d9676c164c95b25490393587bab32bb Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 12:40:38 -0400 Subject: [PATCH 7/9] Handle the case where output file does not exist. --- userbenchmark/torchao/upload.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py index 53dce17481..63d4f14cfe 100644 --- a/userbenchmark/torchao/upload.py +++ b/userbenchmark/torchao/upload.py @@ -2,6 +2,7 @@ import os import csv import subprocess +import warnings from pathlib import Path from typing import List @@ -33,7 +34,11 @@ def post_ci_process(output_files: List[str]): head_repo = "pytorch/ao" head_branch = "main" head_sha = _get_torchao_head_sha() - print(f"Processing file {path} ") + print(f"Processing file {path} ...") + # When the test fails to run or crashes, the output file does not exist. + if not path.exists(): + warnings.warn(f"Expected output file {path} does not exist.") + continue with open(path) as csvfile: reader = csv.DictReader(csvfile, delimiter=",") From 120a8bf45ce5b9937e4b6db472ebb20442d9c29c Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 20:41:49 -0400 Subject: [PATCH 8/9] Fix s3 upload --- .github/workflows/_linux-benchmark-cuda.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index b8903b02c3..35c0c29649 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -43,6 +43,8 @@ jobs: CONDA_ENV: "userbenchmark" SETUP_SCRIPT: "/workspace/setup_instance.sh" HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} steps: - name: Checkout TorchBench uses: actions/checkout@v3 From d8f16d48831de9b9afd77de21dd6f9c84788a6e1 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Fri, 28 Jun 2024 08:52:26 -0400 Subject: [PATCH 9/9] Load multiple files --- userbenchmark/torchao/upload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py index 63d4f14cfe..d1f1cb0286 100644 --- a/userbenchmark/torchao/upload.py +++ b/userbenchmark/torchao/upload.py @@ -71,6 +71,6 @@ def post_ci_process(output_files: List[str]): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--test-file", type=str, help="Add file to test.") + parser.add_argument("--test-files", nargs='+', help="Add files to test.") args = parser.parse_args() - post_ci_process([args.test_file]) + post_ci_process(args.test_files)