diff --git a/.ci/torchbench/install.sh b/.ci/torchbench/install.sh
index 4828b67118..16dcfc6e8d 100644
--- a/.ci/torchbench/install.sh
+++ b/.ci/torchbench/install.sh
@@ -26,4 +26,5 @@ parent_dir=$(dirname "$(readlink -f "$0")")/../..
 cd ${parent_dir}
 
 python -c "import torch; print(torch.__version__); print(torch.version.git_version)"
-python install.py
+
+python install.py $@
diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml
new file mode 100644
index 0000000000..35c0c29649
--- /dev/null
+++ b/.github/workflows/_linux-benchmark-cuda.yml
@@ -0,0 +1,106 @@
+name: linux-benchmark-cuda
+on:
+  workflow_call:
+    inputs:
+      userbenchmark:
+        required: true
+        type: string
+        description: Name of the benchmark
+      userbenchmark-install-args:
+        required: false
+        type: string
+        default: ""
+        description: Userbenchmark installation command line arguments
+      userbenchmark-run-args:
+        required: true
+        type: string
+        description: Userbenchmark run command line arguments
+    secrets:
+      HUGGING_FACE_HUB_TOKEN:
+        required: false
+        description: |
+          HF auth token to avoid rate limits when downloading models or datasets from hub
+      AWS_ACCESS_KEY_ID:
+        required: true
+        description: |
+          AWS access token for S3 uploading
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+        description: |
+          AWS secret access key for S3 uploading
+
+jobs:
+  # Run a specific userbenchmark with given arguments
+  # Need to pass in userbenchmark name and arguments
+  benchmark:
+    # Don't run on forked repos
+    if: github.repository_owner == 'pytorch'
+    runs-on: [a100-runner]
+    timeout-minutes: 1440 # 24 hours
+    environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV: "userbenchmark"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+    steps:
+      - name: Checkout TorchBench
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Tune Nvidia GPU
+        run: |
+          sudo nvidia-smi -pm 1
+          sudo nvidia-smi -ac 1215,1410
+          sudo ldconfig
+          nvidia-smi
+      - name: Remove result if it already exists
+        if: always()
+        run: |
+          # remove old results if exists
+          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
+          pushd benchmark
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+      - name: Clone and setup conda env
+        run: |
+          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
+      - name: Install benchmark
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          bash ./.ci/torchbench/install.sh --userbenchmark ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-install-args }}
+      - name: Run benchmark
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }}
+      - name: Copy benchmark logs
+        if: always()
+        run: |
+          pushd benchmark
+          cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output
+      - name: Upload benchmark result to GH Actions Artifact
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: ${{ inputs.userbenchmark }} benchmarking result
+          path: benchmark-output/
+      - name: Copy artifact and upload to Amazon S3
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          # Upload the result json to Amazon S3
+          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \
+                                                          --upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv"
+      - name: Clean up Conda env
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          conda deactivate && conda deactivate
+          conda remove -n "${CONDA_ENV}" --all
diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
index f82b7fb0f2..b5d43d9b9b 100644
--- a/.github/workflows/torchao.yml
+++ b/.github/workflows/torchao.yml
@@ -1,80 +1,38 @@
 name: Torchao nightly workflow (A100)
 on:
   workflow_dispatch:
-
+  schedule:
+    - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST
 
 jobs:
-  run-benchmark:
-    environment: docker-s3-upload
-    env:
-      BASE_CONDA_ENV: "torchbench"
-      CONDA_ENV:  "torchao-nightly"
-      PLATFORM_NAME: "gcp_a100"
-      SETUP_SCRIPT: "/workspace/setup_instance.sh"
-      TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+  timm:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --timm"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  torchbench:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --torchbench"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      IS_GHA: 1
-      BUILD_ENVIRONMENT: benchmark-nightly
-    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: [a100-runner]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-      - name: Checkout TorchBench
-        uses: actions/checkout@v3
-        with:
-          path: benchmark
-      - name: Tune Nvidia GPU
-        run: |
-          sudo nvidia-smi -pm 1
-          sudo nvidia-smi -ac 1215,1410
-          nvidia-smi
-          sudo ldconfig
-      - name: Clone and setup conda env
-        run: |
-          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
-          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
-      - name: Run the torchao userbenchmark
-        env:
-          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
-          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
-        run: |
-          . "${SETUP_SCRIPT}"
-          set -x
-          # remove old results if exists
-          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
-          pushd benchmark
-          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
-          # Install torchao
-          echo "Installing torchao"
-          pip uninstall -y torchao
-          python install.py --userbenchmark torchao
-          echo "Running the torchao userbenchmark"
-          python run_benchmark.py torchao --ci --dashboard
-      - name: Copy the benchmark logs to benchmark-output
-        if: always()
-        run: |
-          pushd benchmark
-          cp -r ./.userbenchmark/torchao ../benchmark-output
-      - name: Upload result to GH Actions Artifact
-        uses: actions/upload-artifact@v3
-        if: always()
-        with:
-          name: Torchao nightly result
-          path: benchmark-output/
-      - name: Copy artifact and upload to scribe and Amazon S3
-        env:
-          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
-          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
-        run: |
-          . "${SETUP_SCRIPT}"
-          pushd benchmark
-          # Upload the result json to Amazon S3
-          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \
-                                                          --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv"
-      - name: Clean up Conda env
-        if: always()
-        run: |
-          . "${SETUP_SCRIPT}"
-          conda deactivate && conda deactivate
-          conda remove -n "${CONDA_ENV}" --all
+  huggingface:
+    uses: ./.github/workflows/_linux-benchmark-cuda.yml
+    with:
+      userbenchmark: "torchao"
+      userbenchmark-run-args: "--ci --dashboard --huggingface"
+    secrets:
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
diff --git a/userbenchmark/dynamo/dynamobench/torchao_backend.py b/userbenchmark/dynamo/dynamobench/torchao_backend.py
index 29e7d55d76..f02672928b 100644
--- a/userbenchmark/dynamo/dynamobench/torchao_backend.py
+++ b/userbenchmark/dynamo/dynamobench/torchao_backend.py
@@ -14,7 +14,7 @@ def setup_baseline():
 
 def torchao_optimize_ctx(quantization: str):
     import torchao
-    from torchao.quantization import (
+    from torchao.quantization.quant_api import (
         change_linear_weights_to_int4_woqtensors,
         change_linear_weights_to_int8_dqtensors,
         change_linear_weights_to_int8_woqtensors,
diff --git a/userbenchmark/torchao/install.py b/userbenchmark/torchao/install.py
index 9d491f6322..57af33f2c6 100644
--- a/userbenchmark/torchao/install.py
+++ b/userbenchmark/torchao/install.py
@@ -1,13 +1,18 @@
 import os
 import subprocess
 
+def uninstall_torchao():
+    cmd = ["pip", "uninstall", "-y", "torchao"]
+    subprocess.check_call(cmd)
+
 def install_torchao():
     # Set ARCH list so that we can build fp16 with SM75+, the logic is copied from
     # pytorch/builder
     # https://github.com/pytorch/ao/blob/main/packaging/env_var_script_linux.sh#L16C1-L19
-    torchao_env = os.environ
+    torchao_env = os.environ.copy()
     torchao_env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
     subprocess.check_call(["pip", "install", "--pre", "git+https://github.com/pytorch/ao.git"], env=torchao_env)
 
 if __name__ == "__main__":
-    install_torchao()
\ No newline at end of file
+    uninstall_torchao()
+    install_torchao()
diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py
index ad55fb8afc..eb0af49824 100644
--- a/userbenchmark/torchao/run.py
+++ b/userbenchmark/torchao/run.py
@@ -1,4 +1,5 @@
 import argparse
+import itertools
 
 from userbenchmark.utils import get_output_dir
 from typing import List
@@ -8,18 +9,35 @@
 OUTPUT_DIR = get_output_dir(BM_NAME)
 OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
 
-CI_ARGS = [
-    # TIMM
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
-    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
-]
 
+def _get_ci_args(backend: str, modelset: str, dtype, mode: str, device: str, experiment: str) -> List[List[str]]:
+    if modelset == "timm":
+        modelset_full_name = "timm_models"
+    else:
+        modelset_full_name = modelset
+    output_file_name = f"torchao_{backend}_{modelset_full_name}_{dtype}_{mode}_{device}_{experiment}.csv"
+    ci_args = [
+        "--progress",
+        f"--{modelset}",
+        "--quantization",
+        f"{backend}",
+        f"--{mode}",
+        f"--{dtype}",
+        f"--{experiment}",
+        "--output",
+        f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}"
+    ]
+    return ci_args
+
+def _get_full_ci_args(modelset: str) -> List[List[str]]:
+    backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"]
+    modelset = [modelset]
+    dtype = ["bfloat16"]
+    mode = ["inference"]
+    device = ["cuda"]
+    experiment = ["performance", "accuracy"]
+    cfgs = itertools.product(*[backends, modelset, dtype, mode, device, experiment])
+    return [ _get_ci_args(*cfg) for cfg in cfgs]
 
 def _get_output(pt2_args):
     if "--output" in pt2_args:
@@ -28,7 +46,6 @@ def _get_output(pt2_args):
     return "not_available"
 
 
-
 def _run_pt2_args(pt2_args: List[str]) -> str:
     from userbenchmark.dynamo.run import run as run_pt2_benchmark
     print(f"=================== [TORCHAO] Running PT2 Benchmark Runner with Args: {pt2_args} ===================")
@@ -38,15 +55,25 @@ def _run_pt2_args(pt2_args: List[str]) -> str:
 def run(args: List[str]):
     parser = argparse.ArgumentParser()
     parser.add_argument("--ci", action="store_true", help="Run the CI workflow")
+    parser.add_argument("--timm", action="store_true", help="Run the TIMM CI workflow")
+    parser.add_argument("--huggingface", action="store_true", help="Run the Huggingface CI workflow")
+    parser.add_argument("--torchbench", action="store_true", help="Run the Torchbench CI workflow")
     parser.add_argument("--dashboard", action="store_true", help="Update the output files to prepare the S3 upload and dashboard.")
     args, pt2_args = parser.parse_known_args(args)
 
     if args.ci:
-        group_pt2_args = CI_ARGS
+        if args.timm:
+            benchmark_args = _get_full_ci_args(modelset="timm")
+        elif args.huggingface:
+            benchmark_args = _get_full_ci_args(modelset="huggingface")
+        elif args.torchbench:
+            benchmark_args = _get_full_ci_args(modelset="torchbench")
+        else:
+            raise RuntimeError("CI mode must run with --timm, --huggingface, or --torchbench")
     else:
-        group_pt2_args = [pt2_args]
-    
-    output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args]
+        benchmark_args = [pt2_args]
+
+    output_files = [_run_pt2_args(args) for args in benchmark_args]
     # Post-processing
     if args.dashboard:
         post_ci_process(output_files)
diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py
index 53dce17481..d1f1cb0286 100644
--- a/userbenchmark/torchao/upload.py
+++ b/userbenchmark/torchao/upload.py
@@ -2,6 +2,7 @@
 import os
 import csv
 import subprocess
+import warnings
 from pathlib import Path
 from typing import List
 
@@ -33,7 +34,11 @@ def post_ci_process(output_files: List[str]):
         head_repo = "pytorch/ao"
         head_branch = "main"
         head_sha = _get_torchao_head_sha()
-        print(f"Processing file {path} ")
+        print(f"Processing file {path} ...")
+        # When the test fails to run or crashes, the output file does not exist.
+        if not path.exists():
+            warnings.warn(f"Expected output file {path} does not exist.")
+            continue
         with open(path) as csvfile:
             reader = csv.DictReader(csvfile, delimiter=",")
 
@@ -66,6 +71,6 @@ def post_ci_process(output_files: List[str]):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--test-file", type=str, help="Add file to test.")
+    parser.add_argument("--test-files", nargs='+', help="Add files to test.")
     args = parser.parse_args()
-    post_ci_process([args.test_file])
+    post_ci_process(args.test_files)