From 2de61f62b12d14f95d840b5f7bba9b5189415f11 Mon Sep 17 00:00:00 2001 From: jainapurva Date: Sun, 16 Nov 2025 22:27:46 -0800 Subject: [PATCH 1/2] Test int8wo delay --- .github/workflows/run_microbenchmarks.yml | 2 +- .../dashboard/microbenchmark_quantization_config.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_microbenchmarks.yml b/.github/workflows/run_microbenchmarks.yml index 67f977d051..2cac203532 100644 --- a/.github/workflows/run_microbenchmarks.yml +++ b/.github/workflows/run_microbenchmarks.yml @@ -22,7 +22,7 @@ jobs: - name: Setup miniconda uses: pytorch/test-infra/.github/actions/setup-miniconda@main with: - python-version: "3.9" + python-version: "3.10" - name: Run benchmark shell: bash diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml index 8156422668..7a32f5e43d 100644 --- a/benchmarks/dashboard/microbenchmark_quantization_config.yml +++ b/benchmarks/dashboard/microbenchmark_quantization_config.yml @@ -2,10 +2,10 @@ benchmark_mode: "inference" quantization_config_recipe_names: # Will run a baseline inference for model by default, without quantization for comparison - "int8wo" - - "int8dq" - - "float8dq-tensor" - - "float8dq-row" - - "float8wo" + # - "int8dq" + # - "float8dq-tensor" + # - "float8dq-row" + # - "float8wo" output_dir: "benchmarks/microbenchmarks/results" model_params: - name: "small_bf16_linear" From 588019e2943c12128eeee5c5126685b438dcc547 Mon Sep 17 00:00:00 2001 From: Apurva Jain Date: Mon, 17 Nov 2025 11:18:06 -0800 Subject: [PATCH 2/2] Update quantization config for small_bf16_linear model --- benchmarks/dashboard/microbenchmark_quantization_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml index 7a32f5e43d..807a066f5f 100644 --- a/benchmarks/dashboard/microbenchmark_quantization_config.yml +++ b/benchmarks/dashboard/microbenchmark_quantization_config.yml @@ -12,7 +12,7 @@ model_params: matrix_shapes: - name: "small_sweep" min_power: 10 - max_power: 15 + max_power: 14 high_precision_dtype: "torch.bfloat16" torch_compile_mode: "max-autotune" device: "cuda"