diff --git a/.github/workflows/run_microbenchmarks.yml b/.github/workflows/run_microbenchmarks.yml index 67f977d051..2cac203532 100644 --- a/.github/workflows/run_microbenchmarks.yml +++ b/.github/workflows/run_microbenchmarks.yml @@ -22,7 +22,7 @@ jobs: - name: Setup miniconda uses: pytorch/test-infra/.github/actions/setup-miniconda@main with: - python-version: "3.9" + python-version: "3.10" - name: Run benchmark shell: bash diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml index 8156422668..807a066f5f 100644 --- a/benchmarks/dashboard/microbenchmark_quantization_config.yml +++ b/benchmarks/dashboard/microbenchmark_quantization_config.yml @@ -2,17 +2,17 @@ benchmark_mode: "inference" quantization_config_recipe_names: # Will run a baseline inference for model by default, without quantization for comparison - "int8wo" - - "int8dq" - - "float8dq-tensor" - - "float8dq-row" - - "float8wo" + # - "int8dq" + # - "float8dq-tensor" + # - "float8dq-row" + # - "float8wo" output_dir: "benchmarks/microbenchmarks/results" model_params: - name: "small_bf16_linear" matrix_shapes: - name: "small_sweep" min_power: 10 - max_power: 15 + max_power: 14 high_precision_dtype: "torch.bfloat16" torch_compile_mode: "max-autotune" device: "cuda"