From 05c47c4e235db004d59bf53d0f33ff751a1d3c02 Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 10:40:29 -0400
Subject: [PATCH 1/9] Install torchao

---
 .github/workflows/_linux-benchmark-cuda.yml | 104 ++++++++++++++++++++
 .github/workflows/torchao.yml               | 104 ++++++--------------
 userbenchmark/torchao/install.py            |   9 +-
 userbenchmark/torchao/run.py                |  33 ++++---
 4 files changed, 163 insertions(+), 87 deletions(-)
 create mode 100644 .github/workflows/_linux-benchmark-cuda.yml

diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml
new file mode 100644
index 0000000000..934f71f9e8
--- /dev/null
+++ b/.github/workflows/_linux-benchmark-cuda.yml
@@ -0,0 +1,104 @@
+name: linux-benchmark-cuda
+on:
+  workflow_call:
+    inputs:
+      userbenchmark:
+        required: true
+        type: string
+        description: Name of the benchmark
+      userbenchmark-install-args:
+        required: false
+        type: string
+        default: ""
+        description: Userbenchmark installation command line arguments
+      userbenchmark-run-args:
+        required: true
+        type: string
+        description: Userbenchmark run command line arguments
+    secrets:
+      HUGGING_FACE_HUB_TOKEN:
+        required: false
+        description: |
+          HF auth token to avoid rate limits when downloading models or datasets from hub
+      AWS_ACCESS_KEY_ID:
+        required: true
+        description: |
+          AWS access token for S3 uploading
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+        description: |
+          AWS secret access key for S3 uploading
+
+jobs:
+  # Run a specific userbenchmark with given arguments
+  # Need to pass in userbenchmark name and arguments
+  benchmark:
+    # Don't run on forked repos
+    if: github.repository_owner == 'pytorch'
+    runs-on: [a100-runner]
+    timeout-minutes: 1440 # 24 hours
+    environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV: "userbenchmark"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+    steps:
+      - name: Checkout TorchBench
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Tune Nvidia GPU
+        run: |
+          sudo nvidia-smi -pm 1
+          sudo nvidia-smi -ac 1215,1410
+          sudo ldconfig
+          nvidia-smi
+      - name: Remove result if it already exists
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          # remove old results if exists
+          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
+          pushd benchmark
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+      - name: Install benchmark
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          bash ./.ci/torchbench/install.sh --userbenchmark ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-install-args }}
+      - name: Run benchmark
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          # remove old results if exists
+          if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+          python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }}
+      - name: Copy the benchmark logs to benchmark-output
+        if: always()
+        run: |
+          pushd benchmark
+          cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output
+      - name: Upload result to GH Actions Artifact
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: ${{ inputs.userbenchmark }} benchmarking result
+          path: benchmark-output/
+      - name: Copy artifact and upload to Amazon S3
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          # Upload the result json to Amazon S3
+          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \
+                                                          --upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv"
+      - name: Clean up Conda env
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          conda deactivate && conda deactivate
+          conda remove -n "${CONDA_ENV}" --all
diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
index f82b7fb0f2..326a58ae14 100644
--- a/.github/workflows/torchao.yml
+++ b/.github/workflows/torchao.yml
@@ -1,80 +1,38 @@
 name: Torchao nightly workflow (A100)
 on:
   workflow_dispatch:
-
+  schedule:
+    - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST
 
 jobs:
-  run-benchmark:
-    environment: docker-s3-upload
-    env:
-      BASE_CONDA_ENV: "torchbench"
-      CONDA_ENV:  "torchao-nightly"
-      PLATFORM_NAME: "gcp_a100"
-      SETUP_SCRIPT: "/workspace/setup_instance.sh"
-      TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+  torchbench:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --torchbench"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  timm:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --timm"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      IS_GHA: 1
-      BUILD_ENVIRONMENT: benchmark-nightly
-    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: [a100-runner]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-      - name: Checkout TorchBench
-        uses: actions/checkout@v3
-        with:
-          path: benchmark
-      - name: Tune Nvidia GPU
-        run: |
-          sudo nvidia-smi -pm 1
-          sudo nvidia-smi -ac 1215,1410
-          nvidia-smi
-          sudo ldconfig
-      - name: Clone and setup conda env
-        run: |
-          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
-          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
-      - name: Run the torchao userbenchmark
-        env:
-          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
-          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
-        run: |
-          . "${SETUP_SCRIPT}"
-          set -x
-          # remove old results if exists
-          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
-          pushd benchmark
-          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
-          # Install torchao
-          echo "Installing torchao"
-          pip uninstall -y torchao
-          python install.py --userbenchmark torchao
-          echo "Running the torchao userbenchmark"
-          python run_benchmark.py torchao --ci --dashboard
-      - name: Copy the benchmark logs to benchmark-output
-        if: always()
-        run: |
-          pushd benchmark
-          cp -r ./.userbenchmark/torchao ../benchmark-output
-      - name: Upload result to GH Actions Artifact
-        uses: actions/upload-artifact@v3
-        if: always()
-        with:
-          name: Torchao nightly result
-          path: benchmark-output/
-      - name: Copy artifact and upload to scribe and Amazon S3
-        env:
-          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
-          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
-        run: |
-          . "${SETUP_SCRIPT}"
-          pushd benchmark
-          # Upload the result json to Amazon S3
-          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \
-                                                          --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv"
-      - name: Clean up Conda env
-        if: always()
-        run: |
-          . "${SETUP_SCRIPT}"
-          conda deactivate && conda deactivate
-          conda remove -n "${CONDA_ENV}" --all
+  huggingface:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --huggingface"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
diff --git a/userbenchmark/torchao/install.py b/userbenchmark/torchao/install.py
index 9d491f6322..57af33f2c6 100644
--- a/userbenchmark/torchao/install.py
+++ b/userbenchmark/torchao/install.py
@@ -1,13 +1,18 @@
 import os
 import subprocess
 
+def uninstall_torchao():
+    cmd = ["pip", "uninstall", "-y", "torchao"]
+    subprocess.check_call(cmd)
+
 def install_torchao():
     # Set ARCH list so that we can build fp16 with SM75+, the logic is copied from
     # pytorch/builder
     # https://github.com/pytorch/ao/blob/main/packaging/env_var_script_linux.sh#L16C1-L19
-    torchao_env = os.environ
+    torchao_env = os.environ.copy()
     torchao_env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
     subprocess.check_call(["pip", "install", "--pre", "git+https://github.com/pytorch/ao.git"], env=torchao_env)
 
 if __name__ == "__main__":
-    install_torchao()
\ No newline at end of file
+    uninstall_torchao()
+    install_torchao()
diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py
index ad55fb8afc..06f7379453 100644
--- a/userbenchmark/torchao/run.py
+++ b/userbenchmark/torchao/run.py
@@ -9,18 +9,16 @@
 OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
 
 CI_ARGS = [
-    # TIMM
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
 ]
 
-
 def _get_output(pt2_args):
     if "--output" in pt2_args:
         output_index = pt2_args.index("--output")
@@ -28,7 +26,6 @@ def _get_output(pt2_args):
     return "not_available"
 
 
-
 def _run_pt2_args(pt2_args: List[str]) -> str:
     from userbenchmark.dynamo.run import run as run_pt2_benchmark
     print(f"=================== [TORCHAO] Running PT2 Benchmark Runner with Args: {pt2_args} ===================")
@@ -38,14 +35,26 @@ def _run_pt2_args(pt2_args: List[str]) -> str:
 def run(args: List[str]):
     parser = argparse.ArgumentParser()
     parser.add_argument("--ci", action="store_true", help="Run the CI workflow")
+    parser.add_argument("--timm", action="store_true", help="Run the TIMM CI workflow")
+    parser.add_argument("--huggingface", action="store_true", help="Run the Huggingface CI workflow")
+    parser.add_argument("--torchbench", action="store_true", help="Run the Torchbench CI workflow")
     parser.add_argument("--dashboard", action="store_true", help="Update the output files to prepare the S3 upload and dashboard.")
     args, pt2_args = parser.parse_known_args(args)
 
     if args.ci:
         group_pt2_args = CI_ARGS
+        if args.timm:
+            group_pt2_args.append("--timm")
+        elif args.huggingface:
+            group_pt2_args.append("--huggingface")
+        elif args.torchbench:
+            group_pt2_args.append("--torchbench")
+        else:
+            raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench")
     else:
         group_pt2_args = [pt2_args]
-    
+
+
     output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args]
     # Post-processing
     if args.dashboard:

From 1fefb90fbe01571b25f02f29e16caeadb1c09c2f Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 10:43:25 -0400
Subject: [PATCH 2/9] Bugfix

---
 .github/workflows/_linux-benchmark-cuda.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml
index 934f71f9e8..3f6c1531b0 100644
--- a/.github/workflows/_linux-benchmark-cuda.yml
+++ b/.github/workflows/_linux-benchmark-cuda.yml
@@ -57,11 +57,14 @@ jobs:
       - name: Remove result if it already exists
         if: always()
         run: |
-          . "${SETUP_SCRIPT}"
           # remove old results if exists
           if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
           pushd benchmark
           if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+      - name: Clone and setup conda env
+        run: |
+          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
       - name: Install benchmark
         run: |
           . "${SETUP_SCRIPT}"
@@ -75,12 +78,12 @@ jobs:
           if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi
           if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
           python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }}
-      - name: Copy the benchmark logs to benchmark-output
+      - name: Copy benchmark logs
         if: always()
         run: |
           pushd benchmark
           cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output
-      - name: Upload result to GH Actions Artifact
+      - name: Upload benchmark result to GH Actions Artifact
         uses: actions/upload-artifact@v3
         if: always()
         with:

From 7003799951d92512c63a803b11f14b3f92122d55 Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 12:04:54 -0400
Subject: [PATCH 3/9] Update the ci file

---
 .github/workflows/torchao.yml | 36 ++++++++++++------------
 userbenchmark/torchao/run.py  | 52 +++++++++++++++++++++++------------
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
index 326a58ae14..3ff3e64a14 100644
--- a/.github/workflows/torchao.yml
+++ b/.github/workflows/torchao.yml
@@ -5,15 +5,6 @@ on:
     - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST
 
 jobs:
-  torchbench:
-    uses: ./.github/workflows/_linux-benchmark-cuda.yml
-    with:
-      userbenchmark: "torchao"
-      userbenchmark-run-args: "--ci --dashboard --torchbench"
-    secrets:
-      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
   timm:
     uses: ./.github/workflows/_linux-benchmark-cuda.yml
     with:
@@ -23,15 +14,24 @@ jobs:
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  huggingface:
-    uses: ./.github/workflows/_linux-benchmark-cuda.yml
-    with:
-      userbenchmark: "torchao"
-      userbenchmark-run-args: "--ci --dashboard --huggingface"
-    secrets:
-      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  # torchbench:
+  #   uses: ./.github/workflows/_linux-benchmark-cuda.yml
+  #   with:
+  #     userbenchmark: "torchao"
+  #     userbenchmark-run-args: "--ci --dashboard --torchbench"
+  #   secrets:
+  #     HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+  #     AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  # huggingface:
+  #   uses: ./.github/workflows/_linux-benchmark-cuda.yml
+  #   with:
+  #     userbenchmark: "torchao"
+  #     userbenchmark-run-args: "--ci --dashboard --huggingface"
+  #   secrets:
+  #     HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+  #     AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py
index 06f7379453..eb0af49824 100644
--- a/userbenchmark/torchao/run.py
+++ b/userbenchmark/torchao/run.py
@@ -1,4 +1,5 @@
 import argparse
+import itertools
 
 from userbenchmark.utils import get_output_dir
 from typing import List
@@ -8,16 +9,35 @@
 OUTPUT_DIR = get_output_dir(BM_NAME)
 OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
 
-CI_ARGS = [
-    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-]
+
+def _get_ci_args(backend: str, modelset: str, dtype, mode: str, device: str, experiment: str) -> List[List[str]]:
+    if modelset == "timm":
+        modelset_full_name = "timm_models"
+    else:
+        modelset_full_name = modelset
+    output_file_name = f"torchao_{backend}_{modelset_full_name}_{dtype}_{mode}_{device}_{experiment}.csv"
+    ci_args = [
+        "--progress",
+        f"--{modelset}",
+        "--quantization",
+        f"{backend}",
+        f"--{mode}",
+        f"--{dtype}",
+        f"--{experiment}",
+        "--output",
+        f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}"
+    ]
+    return ci_args
+
+def _get_full_ci_args(modelset: str) -> List[List[str]]:
+    backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"]
+    modelset = [modelset]
+    dtype = ["bfloat16"]
+    mode = ["inference"]
+    device = ["cuda"]
+    experiment = ["performance", "accuracy"]
+    cfgs = itertools.product(*[backends, modelset, dtype, mode, device, experiment])
+    return [ _get_ci_args(*cfg) for cfg in cfgs]
 
 def _get_output(pt2_args):
     if "--output" in pt2_args:
@@ -42,20 +62,18 @@ def run(args: List[str]):
     args, pt2_args = parser.parse_known_args(args)
 
     if args.ci:
-        group_pt2_args = CI_ARGS
         if args.timm:
-            group_pt2_args.append("--timm")
+            benchmark_args = _get_full_ci_args(modelset="timm")
         elif args.huggingface:
-            group_pt2_args.append("--huggingface")
+            benchmark_args = _get_full_ci_args(modelset="huggingface")
         elif args.torchbench:
-            group_pt2_args.append("--torchbench")
+            benchmark_args = _get_full_ci_args(modelset="torchbench")
         else:
             raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench")
     else:
-        group_pt2_args = [pt2_args]
-
+        benchmark_args = [pt2_args]
 
-    output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args]
+    output_files = [_run_pt2_args(args) for args in benchmark_args]
     # Post-processing
     if args.dashboard:
         post_ci_process(output_files)

From 868b6b8a884c2d0dc07c71675a6e0c380ac8e87f Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 12:08:11 -0400
Subject: [PATCH 4/9] Run install with the rest of the benchmark

---
 .ci/torchbench/install.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.ci/torchbench/install.sh b/.ci/torchbench/install.sh
index 4828b67118..16dcfc6e8d 100644
--- a/.ci/torchbench/install.sh
+++ b/.ci/torchbench/install.sh
@@ -26,4 +26,5 @@ parent_dir=$(dirname "$(readlink -f "$0")")/../..
 cd ${parent_dir}
 
 python -c "import torch; print(torch.__version__); print(torch.version.git_version)"
-python install.py
+
+python install.py $@

From 7a98b8d4beec645050937d452f7875a2a83b4c6c Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 15:40:31 -0400
Subject: [PATCH 5/9] Add quant api

---
 .github/workflows/_linux-benchmark-cuda.yml         | 3 ---
 userbenchmark/dynamo/dynamobench/torchao_backend.py | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml
index 3f6c1531b0..b8903b02c3 100644
--- a/.github/workflows/_linux-benchmark-cuda.yml
+++ b/.github/workflows/_linux-benchmark-cuda.yml
@@ -74,9 +74,6 @@ jobs:
         run: |
           . "${SETUP_SCRIPT}"
           pushd benchmark
-          # remove old results if exists
-          if [ -d ../benchmark-output ]; then rm -Rf benchmark-output; fi
-          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
           python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }}
       - name: Copy benchmark logs
         if: always()
diff --git a/userbenchmark/dynamo/dynamobench/torchao_backend.py b/userbenchmark/dynamo/dynamobench/torchao_backend.py
index 29e7d55d76..f02672928b 100644
--- a/userbenchmark/dynamo/dynamobench/torchao_backend.py
+++ b/userbenchmark/dynamo/dynamobench/torchao_backend.py
@@ -14,7 +14,7 @@ def setup_baseline():
 
 def torchao_optimize_ctx(quantization: str):
     import torchao
-    from torchao.quantization import (
+    from torchao.quantization.quant_api import (
         change_linear_weights_to_int4_woqtensors,
         change_linear_weights_to_int8_dqtensors,
         change_linear_weights_to_int8_woqtensors,

From 3c3544a01eb0a834fb3babbc170533e90e17d452 Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Wed, 26 Jun 2024 15:59:25 -0400
Subject: [PATCH 6/9] Enable torchbench and huggingface models

---
 .github/workflows/torchao.yml | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
index 3ff3e64a14..b5d43d9b9b 100644
--- a/.github/workflows/torchao.yml
+++ b/.github/workflows/torchao.yml
@@ -14,24 +14,24 @@ jobs:
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  # torchbench:
-  #   uses: ./.github/workflows/_linux-benchmark-cuda.yml
-  #   with:
-  #     userbenchmark: "torchao"
-  #     userbenchmark-run-args: "--ci --dashboard --torchbench"
-  #   secrets:
-  #     HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-  #     AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-  # huggingface:
-  #   uses: ./.github/workflows/_linux-benchmark-cuda.yml
-  #   with:
-  #     userbenchmark: "torchao"
-  #     userbenchmark-run-args: "--ci --dashboard --huggingface"
-  #   secrets:
-  #     HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-  #     AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-  #     AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  torchbench:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --torchbench"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  huggingface:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --huggingface"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}

From fe9a9b7b4d9676c164c95b25490393587bab32bb Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Thu, 27 Jun 2024 12:40:38 -0400
Subject: [PATCH 7/9] Handle the case where output file does not exist.

---
 userbenchmark/torchao/upload.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py
index 53dce17481..63d4f14cfe 100644
--- a/userbenchmark/torchao/upload.py
+++ b/userbenchmark/torchao/upload.py
@@ -2,6 +2,7 @@
 import os
 import csv
 import subprocess
+import warnings
 from pathlib import Path
 from typing import List
 
@@ -33,7 +34,11 @@ def post_ci_process(output_files: List[str]):
         head_repo = "pytorch/ao"
         head_branch = "main"
         head_sha = _get_torchao_head_sha()
-        print(f"Processing file {path} ")
+        print(f"Processing file {path} ...")
+        # When the test fails to run or crashes, the output file does not exist.
+        if not path.exists():
+            warnings.warn(f"Expected output file {path} does not exist.")
+            continue
         with open(path) as csvfile:
             reader = csv.DictReader(csvfile, delimiter=",")
 

From 120a8bf45ce5b9937e4b6db472ebb20442d9c29c Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Thu, 27 Jun 2024 20:41:49 -0400
Subject: [PATCH 8/9] Fix s3 upload

---
 .github/workflows/_linux-benchmark-cuda.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml
index b8903b02c3..35c0c29649 100644
--- a/.github/workflows/_linux-benchmark-cuda.yml
+++ b/.github/workflows/_linux-benchmark-cuda.yml
@@ -43,6 +43,8 @@ jobs:
       CONDA_ENV: "userbenchmark"
       SETUP_SCRIPT: "/workspace/setup_instance.sh"
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
     steps:
       - name: Checkout TorchBench
         uses: actions/checkout@v3

From d8f16d48831de9b9afd77de21dd6f9c84788a6e1 Mon Sep 17 00:00:00 2001
From: Xu Zhao <i@xuzhao.net>
Date: Fri, 28 Jun 2024 08:52:26 -0400
Subject: [PATCH 9/9] Load multiple files

---
 userbenchmark/torchao/upload.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py
index 63d4f14cfe..d1f1cb0286 100644
--- a/userbenchmark/torchao/upload.py
+++ b/userbenchmark/torchao/upload.py
@@ -71,6 +71,6 @@ def post_ci_process(output_files: List[str]):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--test-file", type=str, help="Add file to test.")
+    parser.add_argument("--test-files", nargs='+', help="Add files to test.")
     args = parser.parse_args()
-    post_ci_process([args.test_file])
+    post_ci_process(args.test_files)