Skip to content

[Bug]: Vitis quantization doesn't work with ORT 1.16. #629

@guotuofeng

Description

@guotuofeng

What happened?

The following error happens with the test case if the ORT is 1.16.0. The root cause is the calibrator API is changed by PR
microsoft/onnxruntime@d0316ee#diff-118260dbcc5091cfd805764c7a1a71be2727d89f118ae1bc9f6c31d5fa75c2a1

2023-09-19T23:04:00.2420563Z =================================== FAILURES ===================================
2023-09-19T23:04:00.2421087Z _______________________ test_vitis_ai_quantization_pass ________________________
2023-09-19T23:04:00.2421343Z 
2023-09-19T23:04:00.2433300Z tmp_path = PosixPath('/tmp/pytest-of-cloudtest/pytest-0/test_vitis_ai_quantization_pas0')
2023-09-19T23:04:00.2434332Z 
2023-09-19T23:04:00.2434920Z     def test_vitis_ai_quantization_pass(tmp_path):
2023-09-19T23:04:00.2435303Z         # setup
2023-09-19T23:04:00.2435642Z         input_model = get_onnx_model()
2023-09-19T23:04:00.2436074Z         dummy_user_script = str(tmp_path / "dummy_user_script.py")
2023-09-19T23:04:00.2436476Z         dummy_data = str(tmp_path / "dummy_data")
2023-09-19T23:04:00.2437015Z         with open(dummy_user_script, "w") as f:
2023-09-19T23:04:00.2437342Z             f.write(" ")
2023-09-19T23:04:00.2456174Z         if not os.path.exists(dummy_data):
2023-09-19T23:04:00.2457734Z             os.mkdir(dummy_data)
2023-09-19T23:04:00.2458345Z     
2023-09-19T23:04:00.2458831Z         config = {"user_script": dummy_user_script, "data_dir": dummy_data, "dataloader_func": dummy_calibration_reader}
2023-09-19T23:04:00.2459324Z         output_folder = str(tmp_path / "vitis_ai_quantized")
2023-09-19T23:04:00.2459701Z     
2023-09-19T23:04:00.2460062Z         # create VitisAIQuantization pass
2023-09-19T23:04:00.2460521Z         p = create_pass_from_dict(VitisAIQuantization, config, disable_search=True)
2023-09-19T23:04:00.2461028Z         # execute
2023-09-19T23:04:00.2461414Z >       quantized_model = p.run(input_model, None, output_folder)
2023-09-19T23:04:00.2461644Z 
2023-09-19T23:04:00.2462266Z test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py:53: 
2023-09-19T23:04:00.2462983Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
2023-09-19T23:04:00.2463480Z ***/passes/***_pass.py:400: in run
2023-09-19T23:04:00.2464201Z     output_model = self._run_for_config(model, data_root, config, output_model_path)
2023-09-19T23:04:00.2464637Z ***/passes/onnx/vitis_ai_quantization.py:347: in _run_for_config
2023-09-19T23:04:00.2464969Z     quantize_static(
2023-09-19T23:04:00.2465304Z ***/passes/onnx/vitis_ai/quantize.py:234: in quantize_static
2023-09-19T23:04:00.2465654Z     quantizer = VitisQDQQuantizer(
2023-09-19T23:04:00.2466173Z ***/passes/onnx/vitis_ai/quantizer.py:471: in __init__
2023-09-19T23:04:00.2466534Z     ONNXQuantizer.__init__(
2023-09-19T23:04:00.2467046Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
2023-09-19T23:04:00.2467253Z 
2023-09-19T23:04:00.2467583Z self = <***.passes.onnx.vitis_ai.quantizer.VitisQDQQuantizer object at 0x7fb2987f98b0>
2023-09-19T23:04:00.2467963Z model = ir_version: 5
2023-09-19T23:04:00.2468259Z producer_name: "pytorch"
2023-09-19T23:04:00.2468759Z producer_version: "2.0.0"
2023-09-19T23:04:00.2469229Z graph {
2023-09-19T23:04:00.2469775Z   node {
2023-09-19T23:04:00.2470043Z     input: "input"
2023-09-19T23:04:00.2470420Z     input: "fc1.w....infer"
2023-09-19T23:04:00.2470787Z   value: "onnxruntime.quant"
2023-09-19T23:04:00.2471100Z }
2023-09-19T23:04:00.2471683Z metadata_props {
2023-09-19T23:04:00.2472149Z   key: "onnx.quant.pre_process"
2023-09-19T23:04:00.2472604Z   value: "onnxruntime.quant"
2023-09-19T23:04:00.2473005Z }
2023-09-19T23:04:00.2473247Z 
2023-09-19T23:04:00.2473661Z per_channel = False, reduce_range = False
2023-09-19T23:04:00.2474079Z mode = <QuantizationMode.QLinearOps: 1>, static = True
2023-09-19T23:04:00.2474535Z weight_qType = <QuantType.QInt8: 0>, activation_qType = <QuantType.QUInt8: 1>
2023-09-19T23:04:00.2475314Z tensors_range = {'/fc1/Gemm_output_0': (-0.890625, 1.1015625), 'input': (0.0, 0.2490234375), 'output': (0.0, 0.99609375)}
2023-09-19T23:04:00.2475850Z nodes_to_quantize = None, nodes_to_exclude = None
2023-09-19T23:04:00.2476460Z op_types_to_quantize = ['Relu', 'Add', 'Clip', 'AveragePool', 'Transpose', 'ConvTranspose', ...]
2023-09-19T23:04:00.2477336Z extra_options = {'ActivationSymmetric': False, 'AddQDQPairToWeight': False, 'WeightSymmetric': True}
2023-09-19T23:04:00.2477907Z 
2023-09-19T23:04:00.2478172Z     def __init__(
2023-09-19T23:04:00.2478433Z         self,
2023-09-19T23:04:00.2478684Z         model,
2023-09-19T23:04:00.2478958Z         per_channel,
2023-09-19T23:04:00.2479462Z         reduce_range,
2023-09-19T23:04:00.2479725Z         mode,
2023-09-19T23:04:00.2480314Z         static,
2023-09-19T23:04:00.2480574Z         weight_qType,
2023-09-19T23:04:00.2480856Z         activation_qType,
2023-09-19T23:04:00.2481119Z         tensors_range,
2023-09-19T23:04:00.2481397Z         nodes_to_quantize,
2023-09-19T23:04:00.2481667Z         nodes_to_exclude,
2023-09-19T23:04:00.2481939Z         op_types_to_quantize,
2023-09-19T23:04:00.2482232Z         extra_options=None,
2023-09-19T23:04:00.2482488Z     ):
2023-09-19T23:04:00.2482774Z         if not model_has_infer_metadata(model):
2023-09-19T23:04:00.2483103Z             model = save_and_reload_model_with_shape_infer(model)
2023-09-19T23:04:00.2483452Z         self.value_infos = {vi.name: vi for vi in model.graph.value_info}
2023-09-19T23:04:00.2483830Z         self.value_infos.update({ot.name: ot for ot in model.graph.output})
2023-09-19T23:04:00.2484188Z         self.value_infos.update({it.name: it for it in model.graph.input})
2023-09-19T23:04:00.2484495Z     
2023-09-19T23:04:00.2484938Z         self.model = ONNXModel(model)
2023-09-19T23:04:00.2485259Z         if not static:
2023-09-19T23:04:00.2485560Z             self.model.replace_gemm_with_matmul()
2023-09-19T23:04:00.2485849Z     
2023-09-19T23:04:00.2486289Z         self.per_channel = per_channel  # weight-pack per channel
2023-09-19T23:04:00.2486648Z         self.reduce_range = reduce_range
2023-09-19T23:04:00.2486989Z         self.mode = mode  # QuantizationMode.Value
2023-09-19T23:04:00.2487336Z         self.static = static  # use static quantization for inputs.
2023-09-19T23:04:00.2487682Z         self.fuse_dynamic_quant = False
2023-09-19T23:04:00.2488114Z     
2023-09-19T23:04:00.2488617Z         self.extra_options = extra_options if extra_options else {}
2023-09-19T23:04:00.2488954Z         self.enable_subgraph_quantization = (
2023-09-19T23:04:00.2489297Z             "EnableSubgraph" in self.extra_options and self.extra_options["EnableSubgraph"]
2023-09-19T23:04:00.2489625Z         )
2023-09-19T23:04:00.2489895Z         self.force_quantize_no_input_check = (
2023-09-19T23:04:00.2490274Z             "ForceQuantizeNoInputCheck" in self.extra_options and self.extra_options["ForceQuantizeNoInputCheck"]
2023-09-19T23:04:00.2490625Z         )
2023-09-19T23:04:00.2490967Z         self.q_matmul_const_b_only = "MatMulConstBOnly" in self.extra_options and self.extra_options["MatMulConstBOnly"]
2023-09-19T23:04:00.2491338Z         self.is_weight_symmetric = (
2023-09-19T23:04:00.2491657Z             weight_qType in (QuantType.QInt8, QuantType.QFLOAT8E4M3FN)
2023-09-19T23:04:00.2492005Z             if "WeightSymmetric" not in self.extra_options
2023-09-19T23:04:00.2492320Z             else self.extra_options["WeightSymmetric"]
2023-09-19T23:04:00.2492610Z         )
2023-09-19T23:04:00.2492874Z         self.is_activation_symmetric = (
2023-09-19T23:04:00.2493234Z             False if "ActivationSymmetric" not in self.extra_options else self.extra_options["ActivationSymmetric"]
2023-09-19T23:04:00.2493577Z         )
2023-09-19T23:04:00.2493809Z     
2023-09-19T23:04:00.2494119Z         self.activation_qType = activation_qType.tensor_type
2023-09-19T23:04:00.2494447Z         self.weight_qType = weight_qType.tensor_type
2023-09-19T23:04:00.2494738Z         """
2023-09-19T23:04:00.2495058Z             Dictionary specifying the min and max values for tensors. It has following format:
2023-09-19T23:04:00.2495371Z                 {
2023-09-19T23:04:00.2495644Z                     "param_name": [min, max]
2023-09-19T23:04:00.2495902Z                 }
2023-09-19T23:04:00.2496161Z             example:
2023-09-19T23:04:00.2496403Z                 {
2023-09-19T23:04:00.2496810Z                     'Conv_3:0': [np.float32(0), np.float32(0.5)],
2023-09-19T23:04:00.2497277Z                     'Conv_4:0': [np.float32(1), np.float32(3.5)]
2023-09-19T23:04:00.2497971Z                 }
2023-09-19T23:04:00.2498396Z         """
2023-09-19T23:04:00.2498809Z         if tensors_range is not None and any(map(lambda t: not isinstance(t, TensorData), tensors_range.values())):
2023-09-19T23:04:00.2499723Z >           raise TypeError(
2023-09-19T23:04:00.2500181Z                 f"tensors_range contains unexpected types {set(type(v) for v in tensors_range.values())}, not TensorData."
2023-09-19T23:04:00.2500613Z             )
2023-09-19T23:04:00.2501417Z E           TypeError: tensors_range contains unexpected types {<class 'tuple'>}, not TensorData.
2023-09-19T23:04:00.2501659Z 
2023-09-19T23:04:00.2502160Z /opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/onnxruntime/quantization/onnx_quantizer.py:129: TypeError
2023-09-19T23:04:00.2502726Z ------------------------------ Captured log call -------------------------------
2023-09-19T23:04:00.2503230Z INFO     ***.passes.onnx.vitis_ai_quantization:vitis_ai_quantization.py:290 Preprocessing model for quantization

Version?

ORT 1.16.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions