diff --git a/.ci/torchbench/install.sh b/.ci/torchbench/install.sh index 4828b67118..16dcfc6e8d 100644 --- a/.ci/torchbench/install.sh +++ b/.ci/torchbench/install.sh @@ -26,4 +26,5 @@ parent_dir=$(dirname "$(readlink -f "$0")")/../.. cd ${parent_dir} python -c "import torch; print(torch.__version__); print(torch.version.git_version)" -python install.py + +python install.py $@ diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml new file mode 100644 index 0000000000..35c0c29649 --- /dev/null +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -0,0 +1,106 @@ +name: linux-benchmark-cuda +on: + workflow_call: + inputs: + userbenchmark: + required: true + type: string + description: Name of the benchmark + userbenchmark-install-args: + required: false + type: string + default: "" + description: Userbenchmark installation command line arguments + userbenchmark-run-args: + required: true + type: string + description: Userbenchmark run command line arguments + secrets: + HUGGING_FACE_HUB_TOKEN: + required: false + description: | + HF auth token to avoid rate limits when downloading models or datasets from hub + AWS_ACCESS_KEY_ID: + required: true + description: | + AWS access token for S3 uploading + AWS_SECRET_ACCESS_KEY: + required: true + description: | + AWS secret access key for S3 uploading + +jobs: + # Run a specific userbenchmark with given arguments + # Need to pass in userbenchmark name and arguments + benchmark: + # Don't run on forked repos + if: github.repository_owner == 'pytorch' + runs-on: [a100-runner] + timeout-minutes: 1440 # 24 hours + environment: docker-s3-upload + env: + BASE_CONDA_ENV: "torchbench" + CONDA_ENV: "userbenchmark" + SETUP_SCRIPT: "/workspace/setup_instance.sh" + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + steps: + - name: Checkout TorchBench + uses: actions/checkout@v3 + with: + path: benchmark + - name: Tune Nvidia GPU + run: | + sudo nvidia-smi -pm 1 + sudo nvidia-smi -ac 1215,1410 + sudo ldconfig + nvidia-smi + - name: Remove result if it already exists + if: always() + run: | + # remove old results if exists + if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi + pushd benchmark + if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi + - name: Clone and setup conda env + run: | + CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" + conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" + - name: Install benchmark + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + bash ./.ci/torchbench/install.sh --userbenchmark ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-install-args }} + - name: Run benchmark + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} + - name: Copy benchmark logs + if: always() + run: | + pushd benchmark + cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output + - name: Upload benchmark result to GH Actions Artifact + uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ inputs.userbenchmark }} benchmarking result + path: benchmark-output/ + - name: Copy artifact and upload to Amazon S3 + env: + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} + run: | + . "${SETUP_SCRIPT}" + pushd benchmark + # Upload the result json to Amazon S3 + python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \ + --upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv" + - name: Clean up Conda env + if: always() + run: | + . "${SETUP_SCRIPT}" + conda deactivate && conda deactivate + conda remove -n "${CONDA_ENV}" --all diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index f82b7fb0f2..b5d43d9b9b 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -1,80 +1,38 @@ name: Torchao nightly workflow (A100) on: workflow_dispatch: - + schedule: + - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST jobs: - run-benchmark: - environment: docker-s3-upload - env: - BASE_CONDA_ENV: "torchbench" - CONDA_ENV: "torchao-nightly" - PLATFORM_NAME: "gcp_a100" - SETUP_SCRIPT: "/workspace/setup_instance.sh" - TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} + timm: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --timm" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + torchbench: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --torchbench" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - IS_GHA: 1 - BUILD_ENVIRONMENT: benchmark-nightly - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: [a100-runner] - timeout-minutes: 1440 # 24 hours - steps: - - name: Checkout TorchBench - uses: actions/checkout@v3 - with: - path: benchmark - - name: Tune Nvidia GPU - run: | - sudo nvidia-smi -pm 1 - sudo nvidia-smi -ac 1215,1410 - nvidia-smi - sudo ldconfig - - name: Clone and setup conda env - run: | - CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" - conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" - - name: Run the torchao userbenchmark - env: - WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} - WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} - run: | - . "${SETUP_SCRIPT}" - set -x - # remove old results if exists - if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi - pushd benchmark - if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi - # Install torchao - echo "Installing torchao" - pip uninstall -y torchao - python install.py --userbenchmark torchao - echo "Running the torchao userbenchmark" - python run_benchmark.py torchao --ci --dashboard - - name: Copy the benchmark logs to benchmark-output - if: always() - run: | - pushd benchmark - cp -r ./.userbenchmark/torchao ../benchmark-output - - name: Upload result to GH Actions Artifact - uses: actions/upload-artifact@v3 - if: always() - with: - name: Torchao nightly result - path: benchmark-output/ - - name: Copy artifact and upload to scribe and Amazon S3 - env: - WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} - WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} - run: | - . "${SETUP_SCRIPT}" - pushd benchmark - # Upload the result json to Amazon S3 - python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \ - --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv" - - name: Clean up Conda env - if: always() - run: | - . "${SETUP_SCRIPT}" - conda deactivate && conda deactivate - conda remove -n "${CONDA_ENV}" --all + huggingface: + uses: ./.github/workflows/_linux-benchmark-cuda.yml + with: + userbenchmark: "torchao" + userbenchmark-run-args: "--ci --dashboard --huggingface" + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true diff --git a/userbenchmark/dynamo/dynamobench/torchao_backend.py b/userbenchmark/dynamo/dynamobench/torchao_backend.py index 29e7d55d76..f02672928b 100644 --- a/userbenchmark/dynamo/dynamobench/torchao_backend.py +++ b/userbenchmark/dynamo/dynamobench/torchao_backend.py @@ -14,7 +14,7 @@ def setup_baseline(): def torchao_optimize_ctx(quantization: str): import torchao - from torchao.quantization import ( + from torchao.quantization.quant_api import ( change_linear_weights_to_int4_woqtensors, change_linear_weights_to_int8_dqtensors, change_linear_weights_to_int8_woqtensors, diff --git a/userbenchmark/torchao/install.py b/userbenchmark/torchao/install.py index 9d491f6322..57af33f2c6 100644 --- a/userbenchmark/torchao/install.py +++ b/userbenchmark/torchao/install.py @@ -1,13 +1,18 @@ import os import subprocess +def uninstall_torchao(): + cmd = ["pip", "uninstall", "-y", "torchao"] + subprocess.check_call(cmd) + def install_torchao(): # Set ARCH list so that we can build fp16 with SM75+, the logic is copied from # pytorch/builder # https://github.com/pytorch/ao/blob/main/packaging/env_var_script_linux.sh#L16C1-L19 - torchao_env = os.environ + torchao_env = os.environ.copy() torchao_env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" subprocess.check_call(["pip", "install", "--pre", "git+https://github.com/pytorch/ao.git"], env=torchao_env) if __name__ == "__main__": - install_torchao() \ No newline at end of file + uninstall_torchao() + install_torchao() diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index ad55fb8afc..eb0af49824 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -1,4 +1,5 @@ import argparse +import itertools from userbenchmark.utils import get_output_dir from typing import List @@ -8,18 +9,35 @@ OUTPUT_DIR = get_output_dir(BM_NAME) OUTPUT_DIR.mkdir(exist_ok=True, parents=True) -CI_ARGS = [ - # TIMM - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], - ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"], - ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"], -] +def _get_ci_args(backend: str, modelset: str, dtype, mode: str, device: str, experiment: str) -> List[List[str]]: + if modelset == "timm": + modelset_full_name = "timm_models" + else: + modelset_full_name = modelset + output_file_name = f"torchao_{backend}_{modelset_full_name}_{dtype}_{mode}_{device}_{experiment}.csv" + ci_args = [ + "--progress", + f"--{modelset}", + "--quantization", + f"{backend}", + f"--{mode}", + f"--{dtype}", + f"--{experiment}", + "--output", + f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}" + ] + return ci_args + +def _get_full_ci_args(modelset: str) -> List[List[str]]: + backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] + modelset = [modelset] + dtype = ["bfloat16"] + mode = ["inference"] + device = ["cuda"] + experiment = ["performance", "accuracy"] + cfgs = itertools.product(*[backends, modelset, dtype, mode, device, experiment]) + return [ _get_ci_args(*cfg) for cfg in cfgs] def _get_output(pt2_args): if "--output" in pt2_args: @@ -28,7 +46,6 @@ def _get_output(pt2_args): return "not_available" - def _run_pt2_args(pt2_args: List[str]) -> str: from userbenchmark.dynamo.run import run as run_pt2_benchmark print(f"=================== [TORCHAO] Running PT2 Benchmark Runner with Args: {pt2_args} ===================") @@ -38,15 +55,25 @@ def _run_pt2_args(pt2_args: List[str]) -> str: def run(args: List[str]): parser = argparse.ArgumentParser() parser.add_argument("--ci", action="store_true", help="Run the CI workflow") + parser.add_argument("--timm", action="store_true", help="Run the TIMM CI workflow") + parser.add_argument("--huggingface", action="store_true", help="Run the Huggingface CI workflow") + parser.add_argument("--torchbench", action="store_true", help="Run the Torchbench CI workflow") parser.add_argument("--dashboard", action="store_true", help="Update the output files to prepare the S3 upload and dashboard.") args, pt2_args = parser.parse_known_args(args) if args.ci: - group_pt2_args = CI_ARGS + if args.timm: + benchmark_args = _get_full_ci_args(modelset="timm") + elif args.huggingface: + benchmark_args = _get_full_ci_args(modelset="huggingface") + elif args.torchbench: + benchmark_args = _get_full_ci_args(modelset="torchbench") + else: + raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench") else: - group_pt2_args = [pt2_args] - - output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args] + benchmark_args = [pt2_args] + + output_files = [_run_pt2_args(args) for args in benchmark_args] # Post-processing if args.dashboard: post_ci_process(output_files) diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py index 53dce17481..d1f1cb0286 100644 --- a/userbenchmark/torchao/upload.py +++ b/userbenchmark/torchao/upload.py @@ -2,6 +2,7 @@ import os import csv import subprocess +import warnings from pathlib import Path from typing import List @@ -33,7 +34,11 @@ def post_ci_process(output_files: List[str]): head_repo = "pytorch/ao" head_branch = "main" head_sha = _get_torchao_head_sha() - print(f"Processing file {path} ") + print(f"Processing file {path} ...") + # When the test fails to run or crashes, the output file does not exist. + if not path.exists(): + warnings.warn(f"Expected output file {path} does not exist.") + continue with open(path) as csvfile: reader = csv.DictReader(csvfile, delimiter=",") @@ -66,6 +71,6 @@ def post_ci_process(output_files: List[str]): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--test-file", type=str, help="Add file to test.") + parser.add_argument("--test-files", nargs='+', help="Add files to test.") args = parser.parse_args() - post_ci_process([args.test_file]) + post_ci_process(args.test_files)