diff --git a/examples/resnet/resnet_ptq_cpu.json b/examples/resnet/resnet_ptq_cpu.json index e66b5652fb..922882b08b 100644 --- a/examples/resnet/resnet_ptq_cpu.json +++ b/examples/resnet/resnet_ptq_cpu.json @@ -24,7 +24,7 @@ { "name": "accuracy_custom", "priority": 1, "higher_is_better": true, - "goal": {"type": "max-degradation", "value": 0.01} + "goal": {"type": "max-degradation", "value": 0.1} } ], "user_config":{ @@ -41,7 +41,7 @@ { "name": "avg", "priority": 2, - "goal": {"type": "percent-min-improvement", "value": 20} + "goal": {"type": "percent-min-improvement", "value": 10} } ], "user_config":{ diff --git a/examples/test/test_resnet_vitis_ai_ptq_cpu.py b/examples/test/test_resnet_vitis_ai_ptq_cpu.py index 8c7a0d3e56..fff11065eb 100644 --- a/examples/test/test_resnet_vitis_ai_ptq_cpu.py +++ b/examples/test/test_resnet_vitis_ai_ptq_cpu.py @@ -33,7 +33,7 @@ def setup(): @pytest.mark.parametrize("system", ["local_system", "aml_system"]) @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"]) @pytest.mark.skipif( - version.parse(OrtVersion) == version.parse("1.16.0"), + version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"), reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData", ) def test_resnet(search_algorithm, execution_order, system, olive_json): diff --git a/olive/passes/onnx/quant_pre_process.py b/olive/passes/onnx/quant_pre_process.py deleted file mode 100644 index f324c47deb..0000000000 --- a/olive/passes/onnx/quant_pre_process.py +++ /dev/null @@ -1,166 +0,0 @@ -# -------------------------------------------------------------------------- -# Copyright (c) Microsoft, Intel Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -import logging -import shutil -import tempfile -import traceback -from contextlib import contextmanager -from pathlib import Path -from typing import Optional - -import onnx -import onnxruntime -from onnxruntime.quantization.quant_utils import add_pre_process_metadata -from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference - -logger = logging.getLogger(__name__) - -# ruff: noqa: N802 - - -def quant_pre_process( - input_model_path: str, - output_model_path: str, - skip_optimization: bool = False, - skip_onnx_shape: bool = False, - skip_symbolic_shape: bool = False, - auto_merge: bool = False, - int_max: int = 2**31 - 1, - guess_output_rank: bool = False, - verbose: int = 0, - save_as_external_data: bool = False, - all_tensors_to_one_file: bool = False, - external_data_location: Optional[str] = None, - external_data_size_threshold: int = 1024, -) -> None: - """Shape inference and model optimization, in preparation for quantization. - - Args: - input_model_path: Path to the input model file") - output_model_path: Path to the output model file - skip_optimization: Skip model optimization step if true. This may result in ONNX shape - inference failure for some models. - skip_onnx_shape: Skip ONNX shape inference. Symbolic shape inference is most effective - with transformer based models. Skipping all shape inferences may - reduce the effectiveness of quantization, as a tensor with unknown - shape can not be quantized. - skip_symbolic_shape: Skip symbolic shape inference. Symbolic shape inference is most - effective with transformer based models. Skipping all shape - inferences may reduce the effectiveness of quantization, as a tensor - with unknown shape can not be quantized. - auto_merge: For symbolic shape inference, automatically merge symbolic dims when - conflict happens. - int_max: For symbolic shape inference, specify the maximum value for integer to be - treated as boundless for ops like slice - guess_output_rank: Guess output rank to be the same as input 0 for unknown ops - verbose: Logs detailed info of inference, 0: turn off, 1: warnings, 3: detailed - save_as_external_data: Saving an ONNX model to external data - all_tensors_to_one_file: Saving all the external data to one file - external_data_location: The file location to save the external file - external_data_size_threshold: The size threshold for external data - """ - with TemporaryDirectory(prefix="pre.quant.") as quant_tmp_dir: - temp_path = Path(quant_tmp_dir) - model = None - - if not skip_symbolic_shape: - logger.info("Performing symbolic shape inference...") - model = SymbolicShapeInference.infer_shapes( - onnx.load(input_model_path), - int_max, - auto_merge, - guess_output_rank, - verbose, - ) - - if not skip_optimization: - # Use ORT optimizers (native code) to optimize model - if not skip_symbolic_shape: - # Need to save the inferenced model to file so as to run the optimizer - input_model_path = str(temp_path / "symbolic_shape_inferred.onnx") - if save_as_external_data: - onnx.save_model( - model, - input_model_path, - save_as_external_data=True, - all_tensors_to_one_file=all_tensors_to_one_file, - size_threshold=external_data_size_threshold, - convert_attribute=False, - ) - else: - onnx.save(model, input_model_path) - model = None - - opt_model_path = str(temp_path / "optimized.onnx") - try: - sess_option = onnxruntime.SessionOptions() - sess_option.optimized_model_filepath = opt_model_path - sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC - _ = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"]) - except Exception: - logger.error( - "ONNX Runtime Model Optimization Failed! Consider rerun with option `--skip_optimization'." - ) - logger.error(traceback.format_exc()) - - input_model_path = opt_model_path - - if not skip_onnx_shape: - # ONNX shape inference. - # According to docs, infer_shapes_path should be used for 2G+ models. - # If the skip optimization is specified, we could be dealing with a - # large model. So be on the safe side, save the model - if model is not None: - input_model_path = str(temp_path / "symbolic_shape_inferred.onnx") - if save_as_external_data: - onnx.save_model( - model, - input_model_path, - save_as_external_data=True, - all_tensors_to_one_file=all_tensors_to_one_file, - size_threshold=external_data_size_threshold, - convert_attribute=False, - ) - else: - onnx.save(model, input_model_path) - model = None - - inferred_model_path = str(temp_path / "onnx_shape_inferred.onnx") - onnx.shape_inference.infer_shapes_path(input_model_path, inferred_model_path) - model = onnx.load(inferred_model_path) - - if model is None: - model = onnx.load(input_model_path) - - add_pre_process_metadata(model) - - if save_as_external_data: - onnx.save_model( - model, - output_model_path, - save_as_external_data=True, - all_tensors_to_one_file=all_tensors_to_one_file, - location=external_data_location, - size_threshold=external_data_size_threshold, - convert_attribute=False, - ) - else: - onnx.save(model, output_model_path) - - -@contextmanager -def TemporaryDirectory(**kwargs): - # TODO(myguo): this is a workaround for issue https://github.com/microsoft/onnxruntime/issues/17627 - # on Windows. - name = tempfile.mkdtemp(**kwargs) - try: - yield name - finally: - try: - shutil.rmtree(name) - except OSError: - logger.warning(f"Failed to remove: {name}", exc_info=True) diff --git a/olive/passes/onnx/quantization.py b/olive/passes/onnx/quantization.py index 256f3c6b85..4273f0b243 100644 --- a/olive/passes/onnx/quantization.py +++ b/olive/passes/onnx/quantization.py @@ -434,10 +434,9 @@ def _run_for_config( return model_proto_to_olive_model(onnx_model, output_model_path, config) def _quant_preprocess(self, model: ONNXModel, output_model_path: Union[str, Path]) -> ONNXModel: - from olive.passes.onnx.quant_pre_process import quant_pre_process + from onnxruntime.quantization.preprocess import quant_pre_process try: - # TODO(myguo): use ORT version once the Windows issue is fixed quant_pre_process( input_model_path=model.model_path, output_model_path=str(output_model_path), diff --git a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py index 169ce43746..cbd34ab00a 100644 --- a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py +++ b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py @@ -37,7 +37,7 @@ def dummy_calibration_reader(data_dir=None, batch_size=1, *args, **kwargs): @pytest.mark.skipif( - version.parse(OrtVersion) == version.parse("1.16.0"), + version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"), reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData", ) def test_vitis_ai_quantization_pass(tmp_path):