diff --git a/onnxruntime/core/optimizer/nhwc_transformer.cc b/onnxruntime/core/optimizer/nhwc_transformer.cc index 67952b0e039ef..47732e4af4b89 100644 --- a/onnxruntime/core/optimizer/nhwc_transformer.cc +++ b/onnxruntime/core/optimizer/nhwc_transformer.cc @@ -50,6 +50,7 @@ class NhwcTransformerImpl { void TransformQLinearActivation(Node& node); void TransformQLinearGlobalAveragePool(Node& node); void TransformSplit(Node& node); + void TransformPad(Node& node); Graph& graph_; @@ -282,6 +283,49 @@ void NhwcTransformerImpl::TransformSplit(Node& node) { CreateNhwcArgument(node, node, nhwc_input->rank_); } +void NhwcTransformerImpl::TransformPad(Node& node) { + auto& input_defs = node.MutableInputDefs(); + + auto* nhwc_input = LookupNhwcArgument(input_defs[0]); + if (nhwc_input == nullptr) { + return; + } + + const ONNX_NAMESPACE::TensorProto* pads_tensor_proto = nullptr; + if (!graph_utils::NodeArgIsConstant(graph_, *input_defs[1]) || + !graph_.GetInitializedTensor(input_defs[1]->Name(), pads_tensor_proto) || + (pads_tensor_proto->dims_size() != 1) || + (pads_tensor_proto->dims(0) != nhwc_input->rank_ * 2) || + (nhwc_input->rank_ <= 2)) { // nc only, no any hw axises + return; + } + + // perm nchw to nhwc on pad tensor + Initializer pads_initializer{*pads_tensor_proto, graph_.ModelPath()}; + const int64_t* nchw_pads_data = pads_initializer.data(); + size_t n_dim = static_cast(pads_tensor_proto->dims(0)) / 2; + std::vector nhwc_pads(nchw_pads_data, nchw_pads_data + pads_tensor_proto->dims(0)); + std::copy_n(nchw_pads_data + 2, n_dim - 2, nhwc_pads.data() + 1); + std::copy_n(nchw_pads_data + 2 + n_dim, n_dim - 2, nhwc_pads.data() + 1 + n_dim); + nhwc_pads[n_dim - 1] = nchw_pads_data[1]; + nhwc_pads[2 * n_dim - 1] = nchw_pads_data[n_dim + 1]; + + ONNX_NAMESPACE::TensorProto nhwc_pads_tensor_proto; + nhwc_pads_tensor_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + nhwc_pads_tensor_proto.set_name(graph_.GenerateNodeArgName("nhwc_permutated_pads")); + nhwc_pads_tensor_proto.set_raw_data(nhwc_pads.data(), n_dim * 2 * sizeof(int64_t)); + nhwc_pads_tensor_proto.add_dims(n_dim * 2); + NodeArg* nhwc_pads_arg = &graph_utils::AddInitializer(graph_, nhwc_pads_tensor_proto); + + // Update the node to directly use the NHWC inputs and decrement the original + // use counts of the NHWC inputs. + input_defs[1] = nhwc_pads_arg; + input_defs[0] = nhwc_input->nhwc_arg_; + nhwc_input->remaining_original_uses_--; + + CreateNhwcArgument(node, node, nhwc_input->rank_); +} + void NhwcTransformerImpl::Transform(Node& node) { if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "QLinearConv", {10})) { TransformQLinearConv(node); @@ -295,6 +339,8 @@ void NhwcTransformerImpl::Transform(Node& node) { TransformQLinearGlobalAveragePool(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Split", {2, 11, 13})) { TransformSplit(node); + } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Pad", {11, 13})) { + TransformPad(node); } } diff --git a/onnxruntime/core/providers/cpu/tensor/pad.cc b/onnxruntime/core/providers/cpu/tensor/pad.cc index e12852a29641c..79db40e14692d 100644 --- a/onnxruntime/core/providers/cpu/tensor/pad.cc +++ b/onnxruntime/core/providers/cpu/tensor/pad.cc @@ -322,8 +322,15 @@ static Status PadImpl(OpKernelContext* ctx, int64_t prePad = reshaped_pad[inner_axis]; int64_t postPad = reshaped_pad[inner_axis + new_dims_count]; - PadAxisConstant(axisStart - prePad, *axisStart, prePad); - PadAxisConstant(output, *(output - 1), postPad); + if (inner_no_pad_size == 1) { + PadAxisConstant(axisStart - prePad, *axisStart, prePad); + PadAxisConstant(output, *(output - 1), postPad); + } else { + // When inner_most axis(es) do not need pad, above PadAxisConstant() do not fit for Edge mode. + // Also general loop below after handling first pad axis with non-pad axis works fine. + PadAxis(axisStart - prePad, axisStart, 1, -ptrdiff_t(inner_no_pad_size), inner_no_pad_size, pads[inner_axis]); + PadAxis(output, output - inner_no_pad_size, 1, -ptrdiff_t(inner_no_pad_size), inner_no_pad_size, pads[inner_axis + data_rank]); + } output += postPad; alignSkip = prePad; } @@ -353,8 +360,14 @@ static Status PadImpl(OpKernelContext* ctx, int64_t prePad = reshaped_pad[inner_axis]; int64_t postPad = reshaped_pad[inner_axis + new_dims_count]; - PadInnermostAxis(axisStart - prePad, axisStart + prePad, -1 /* inputDelta */, prePad); - PadInnermostAxis(output, output - 2, -1 /* inputDelta */, postPad); + if (inner_no_pad_size == 1) { + PadInnermostAxis(axisStart - prePad, axisStart + prePad, -1 /* inputDelta */, prePad); + PadInnermostAxis(output, output - 2, -1 /* inputDelta */, postPad); + } else { + // When inner_most axis(es) do not need pad, Above PadInnermostAxis() do not fit for Reflect mode. + PadAxis(axisStart - prePad, axisStart + prePad, 1, -ptrdiff_t(inner_no_pad_size * 2), inner_no_pad_size, pads[inner_axis]); + PadAxis(output, output - 2 * inner_no_pad_size, 1, -ptrdiff_t(inner_no_pad_size * 2), inner_no_pad_size, pads[inner_axis + data_rank]); + } output += postPad; alignSkip = prePad; } diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index 24610903520ab..bdd4d44063a72 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -139,14 +139,14 @@ def get_intermediate_outputs(self, calib_mode='naive', providers=None, ort_graph ''' #conduct inference session and get intermediate outputs + sess_options = onnxruntime.SessionOptions() if ort_graph_optimization_enable: - session = onnxruntime.InferenceSession(self.augmented_model_path, None) + sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC else: - sess_options = onnxruntime.SessionOptions() - sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL #ORT_ENABLE_BASIC - session = onnxruntime.InferenceSession(self.augmented_model_path, - sess_options=sess_options, - providers=providers) + sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL + session = onnxruntime.InferenceSession(self.augmented_model_path, + sess_options=sess_options, + providers=providers) #number of outputs in original model num_model_outputs = len(self.model.graph.output) diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index 48d729a9239d3..5f6946261a7df 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -15,7 +15,7 @@ from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel from .quant_utils import QuantizationMode, QuantizedValueType, QuantizedInitializer, QuantizedValue, quantization_modes -from .quant_utils import find_by_name, get_elem_index, get_mul_node, generate_identified_filename, attribute_to_kwarg, type_to_name +from .quant_utils import find_by_name, get_elem_index, get_mul_node, generate_identified_filename, attribute_to_kwarg, type_to_name, quantize_nparray from .quant_utils import QuantType, onnx_domain, __producer__, __version__ from .registry import CreateOpQuantizer, CreateDefaultOpQuantizer @@ -48,11 +48,11 @@ def quantize_data(data, quantize_range, qType): scale = (float(max_range) * 2) / quantize_range if max_range > 0 else 1 zero_point = 0 # signed byte type - quantized_data = (np.asarray(data) / scale).round().astype('b') + quantized_data = quantize_nparray(QuantType.QInt8, np.asarray(data), scale, zero_point) elif qType == onnx_proto.TensorProto.UINT8: scale = (float(rmax) - rmin) / quantize_range if rmin != rmax else 1 zero_point = round((0 - rmin) / scale) # round to nearest integer - quantized_data = ((np.asarray(data) / scale).round() + zero_point).astype('B') # unsigned byte type + quantized_data = quantize_nparray(QuantType.QUInt8, np.asarray(data), scale, zero_point) else: raise ValueError("Unexpected data type {} requested. Only INT8 and UINT8 are supported.".format(qType)) @@ -500,7 +500,7 @@ def _get_quantization_params(self, param_name): return True, scale_name, zero_point_name, scale_shape, zero_point_shape - def _get_quantize_input_nodes(self, node, input_index, qType): + def _get_quantize_input_nodes(self, node, input_index, qType, given_scale_name = None, given_zp_name = None): ''' Given an input for a node (which is not a initializer), this function - add nodes to compute zero point and scale for this input if they don't exist. @@ -508,13 +508,17 @@ def _get_quantize_input_nodes(self, node, input_index, qType): parameter node: node being quantized in NodeProto format. parameter input_index: index of input in node.input. parameter qType: type to quantize to. + parameter given_scale_name: if those inputs need to be quanitzed using this scale tensor. + parameter given_zp_name: if those inputs to be quantized using this zeropoint tensor. return: List of newly created nodes in NodeProto format. ''' input_name = node.input[input_index] output_name = input_name + "_quantized" - data_found, scale_name, zp_name, _, _ = \ - self._get_quantization_params(input_name) + if (given_scale_name is not None) and (given_zp_name is not None): + data_found, scale_name, zp_name = (True, given_scale_name, given_zp_name) + else: + data_found, scale_name, zp_name, _, _ = self._get_quantization_params(input_name) if self.static: if data_found == False: diff --git a/onnxruntime/python/tools/quantization/operators/pad.py b/onnxruntime/python/tools/quantization/operators/pad.py new file mode 100644 index 0000000000000..d6ca1392c8327 --- /dev/null +++ b/onnxruntime/python/tools/quantization/operators/pad.py @@ -0,0 +1,68 @@ +import onnx +import numpy as np +from .base_operator import QuantOperatorBase +from ..quant_utils import QuantizedValue, QuantizedValueType, attribute_to_kwarg, quantize_nparray + + +class QPad(QuantOperatorBase): + def __init__(self, onnx_quantizer, onnx_node): + super().__init__(onnx_quantizer, onnx_node) + + def quantize(self): + node = self.node + assert (node.op_type == "Pad") + + # Only after version 11, it has the optional constant_value + # If input[0] is not quantized, do not quanitize this node + if (self.quantizer.opset_version < 11) or (node.input[0] not in self.quantizer.quantized_value_map): + super().quantize() + return + quantized_input_value = self.quantizer.quantized_value_map[node.input[0]] + + kwargs = {} + for attribute in node.attribute: + kv = attribute_to_kwarg(attribute) + kwargs.update(kv) + + if 'mode' not in kwargs or kwargs['mode'] == b'constant': + if len(node.input) > 2: # There is 3rd input 'constant_value' + zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name) + scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name) + if zp_tensor is None or scale_tensor is None: + super().quantize() + return + + padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2]) + if padding_constant_initializer is not None: + zp_array = onnx.numpy_helper.to_array(zp_tensor) + zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0] + scale_array = onnx.numpy_helper.to_array(scale_tensor) + scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0] + padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer) + quantized_padding_constant_array = quantize_nparray( + quantized_input_value.qType, padding_constant_array, scale_value, zp_value) + quantized_padding_constant_name = node.input[2] + "_quantized" + quantized_padding_constant_initializer = onnx.numpy_helper.from_array( + quantized_padding_constant_array, quantized_padding_constant_name) + # Suppose this padding constant initializer only used by the node + self.quantizer.model.remove_initializer(padding_constant_initializer) + self.quantizer.model.add_initializer(quantized_padding_constant_initializer) + node.input[2] = quantized_padding_constant_name + else: + pad_value_qnodes = self.quantizer._get_quantize_input_nodes( + node, 2, quantized_input_value.qType, + quantized_input_value.scale_name, quantized_input_value.zp_name) + self.quantizer.new_nodes += [pad_value_qnodes] + node.input[2] = pad_value_qnodes.output[0] + else: + node.input.extend([quantized_input_value.zp_name]) # pad zero_point for original zero + + # Create an entry for output quantized value + quantized_output_value = QuantizedValue(node.output[0], node.output[0] + "_quantized", + quantized_input_value.scale_name, quantized_input_value.zp_name, + QuantizedValueType.Input) + self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value + + node.input[0] = quantized_input_value.q_name + node.output[0] = quantized_output_value.q_name + self.quantizer.new_nodes += [node] diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index 2b3e430492a4f..c4ed4a1a6ced6 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -60,6 +60,15 @@ class QuantType(Enum): } +def quantize_nparray(qtype, arr, scale, zero_point, low = None, high = None): + dtype = QUANT_TYPE_TO_NP_TYPE[qtype] + cliplow = max(0 if dtype == numpy.uint8 else -127, -127 if low is None else low) + cliphigh = min(255 if dtype == numpy.uint8 else 127, 255 if high is None else high) + arr_fp32 = numpy.asarray((arr.astype(numpy.float32) / scale).round() + zero_point) + numpy.clip(arr_fp32, cliplow, cliphigh, out=arr_fp32) + return arr_fp32.astype(dtype) + + class QuantizedInitializer: ''' Represents a linearly quantized weight input from ONNX operators diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 0ab33442d2f5e..92b8ace220faf 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -35,7 +35,7 @@ def optimize_model(model_path: Path): sess_option = SessionOptions() sess_option.optimized_model_filepath = opt_model_path.as_posix() sess_option.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC - _ = InferenceSession(model_path.as_posix(), sess_option) + _ = InferenceSession(model_path.as_posix(), sess_option, providers=['CPUExecutionProvider']) optimized_model = onnx.load(opt_model_path.as_posix()) return optimized_model diff --git a/onnxruntime/python/tools/quantization/registry.py b/onnxruntime/python/tools/quantization/registry.py index 1503c69faa97c..16ce66467f06a 100644 --- a/onnxruntime/python/tools/quantization/registry.py +++ b/onnxruntime/python/tools/quantization/registry.py @@ -11,6 +11,7 @@ from .operators.gavgpool import QGlobalAveragePool from .operators.lstm import LSTMQuant from .operators.split import QSplit +from .operators.pad import QPad CommonOpsRegistry = {"Gather": GatherQuant, "EmbedLayerNormalization": EmbedLayerNormalizationQuant} @@ -33,7 +34,8 @@ "Sigmoid": QLinearActivation, "MaxPool": QMaxPool, "GlobalAveragePool": QGlobalAveragePool, - "Split": QSplit, + "Split" : QSplit, + "Pad" : QPad, } QLinearOpsRegistry.update(CommonOpsRegistry) diff --git a/onnxruntime/test/optimizer/nhwc_transformer_test.cc b/onnxruntime/test/optimizer/nhwc_transformer_test.cc index 8960a526d7112..60d5eebb61d10 100644 --- a/onnxruntime/test/optimizer/nhwc_transformer_test.cc +++ b/onnxruntime/test/optimizer/nhwc_transformer_test.cc @@ -455,6 +455,40 @@ TEST(NhwcTransformerTests, ConvSplit) { } } +TEST(NhwcTransformerTests, ConvPad) { + std::vector pad_modes = {"constant", "reflect", "edge"}; + for (const auto& mode : pad_modes) { + auto build_test_case = [&](NhwcTestHelper& helper) { + auto* input_arg = helper.MakeInput({1, 23, 13, 13}); + auto* conv1_output_arg = helper.MakeIntermediate(); + auto* pads_const = helper.MakeScalarInitializer(131); + auto* pads_arg = helper.Make1DInitializer({0, 0, 1, 2, 0, 0, 3, 4}); + auto* pad_output_arg = helper.MakeIntermediate(); + auto* conv2_output_arg = helper.MakeIntermediate(); + auto* output_arg = helper.MakeOutput(); + + Node& conv1_node = helper.AddQLinearConvNode(input_arg, .01f, 135, + {30, 23, 3, 3}, .02f, 126, + conv1_output_arg, .37f, 131); + conv1_node.AddAttribute("pads", std::vector{1, 1, 1, 1}); + Node& pad_node = helper.AddNode("Pad", {conv1_output_arg, pads_arg, pads_const}, {pad_output_arg}); + pad_node.AddAttribute("mode", mode); + helper.AddQLinearConvNode(pad_output_arg, .37f, 131, + {16, 30, 3, 3}, .015f, 129, + conv2_output_arg, .37f, 131); + helper.AddDequantizeLinearNode(conv2_output_arg, .37f, 131, output_arg); + }; + + auto check_nhwc_graph = [&](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + EXPECT_EQ(op_to_count["com.microsoft.QLinearConv"], 2); + EXPECT_EQ(op_to_count["Transpose"], 2); + }; + + NhwcTransformerTester(build_test_case, check_nhwc_graph); + } +} + TEST(NhwcTransformerTests, ConvBlockActivation) { auto test_case = [&](uint32_t extra_edges) { auto build_test_case = [&](NhwcTestHelper& helper) { diff --git a/onnxruntime/test/providers/cpu/tensor/pad_test.cc b/onnxruntime/test/providers/cpu/tensor/pad_test.cc index 86ea66ed84f71..d18dbe7095c1d 100644 --- a/onnxruntime/test/providers/cpu/tensor/pad_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/pad_test.cc @@ -360,6 +360,233 @@ TYPED_TEST(PadOpTest, Pad_Reflect_2D) { "reflect"); } +TYPED_TEST(PadOpTest, Pad_Constant_3D_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({3, 2, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, 1, 0, 1, 1, 0}, + T(31), + {5, 4, 5}, + {T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31), + + T(31), T(31), T(31), T(31), T(31), + T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(31), T(31), T(31), T(31), T(31), + + T(31), T(31), T(31), T(31), T(31), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(31), T(31), T(31), T(31), T(31), + + T(31), T(31), T(31), T(31), T(31), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(31), T(31), T(31), T(31), T(31), + + T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31), + T(31), T(31), T(31), T(31), T(31)}, + "constant"); +} + +TYPED_TEST(PadOpTest, Pad_Edge_3D_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({3, 2, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, 1, 0, 1, 1, 0}, + T(0), + {5, 4, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(6), T(7), T(8), T(9), T(10), + + T(1), T(2), T(3), T(4), T(5), + T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(6), T(7), T(8), T(9), T(10), + + T(11), T(12), T(13), T(14), T(15), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(16), T(17), T(18), T(19), T(20), + + T(21), T(22), T(23), T(24), T(25), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(26), T(27), T(28), T(29), T(30), + + T(21), T(22), T(23), T(24), T(25), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(26), T(27), T(28), T(29), T(30)}, + "edge"); +} + +TYPED_TEST(PadOpTest, Pad_Edge_3D_Last_Pad_Slice_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({3, 2, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, -1, 0, 1, 1, 0}, + T(0), + {5, 2, 5}, + {T(6), T(7), T(8), T(9), T(10), + T(6), T(7), T(8), T(9), T(10), + + T(6), T(7), T(8), T(9), T(10), + T(6), T(7), T(8), T(9), T(10), + + T(16), T(17), T(18), T(19), T(20), + T(16), T(17), T(18), T(19), T(20), + + T(26), T(27), T(28), T(29), T(30), + T(26), T(27), T(28), T(29), T(30), + + T(26), T(27), T(28), T(29), T(30), + T(26), T(27), T(28), T(29), T(30)}, + "edge"); +} + +TYPED_TEST(PadOpTest, Pad_Edge_3D_Last_Slice_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({2, 3, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, -1, 0, 1, 0, 0}, + T(0), + {4, 2, 5}, + {T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + "edge"); +} + +TYPED_TEST(PadOpTest, Pad_Reflect_3D_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({3, 2, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, 1, 0, 1, 1, 0}, + T(0), + {5, 4, 5}, + {T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15), + + T(6), T(7), T(8), T(9), T(10), + T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(1), T(2), T(3), T(4), T(5), + + T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15), + + T(26), T(27), T(28), T(29), T(30), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(21), T(22), T(23), T(24), T(25), + + T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(11), T(12), T(13), T(14), T(15)}, + "reflect"); +} + +TYPED_TEST(PadOpTest, Pad_Reflect_3D_Last_Pad_Slice_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({2, 3, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, -1, 0, 1, 1, 0}, + T(0), + {4, 3, 5}, + {T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(21), T(22), T(23), T(24), T(25), + + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(6), T(7), T(8), T(9), T(10), + + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + T(21), T(22), T(23), T(24), T(25), + + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(6), T(7), T(8), T(9), T(10)}, + "reflect"); +} + +TYPED_TEST(PadOpTest, Pad_Reflect_3D_Last_Slice_Inner_No_Padding) { + using T = TypeParam; + RunAllOpsetAllDomainPadTests({2, 3, 5}, + {T(1), T(2), T(3), T(4), T(5), + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + T(16), T(17), T(18), T(19), T(20), + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30)}, + {1, -1, 0, 1, 0, 0}, + T(0), + {4, 2, 5}, + {T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15), + + T(21), T(22), T(23), T(24), T(25), + T(26), T(27), T(28), T(29), T(30), + + T(6), T(7), T(8), T(9), T(10), + T(11), T(12), T(13), T(14), T(15)}, + "reflect"); +} /* Example numpy for testing behavior diff --git a/onnxruntime/test/python/quantization/op_test_utils.py b/onnxruntime/test/python/quantization/op_test_utils.py new file mode 100644 index 0000000000000..31c2a32718592 --- /dev/null +++ b/onnxruntime/test/python/quantization/op_test_utils.py @@ -0,0 +1,46 @@ + +import onnx +import numpy as np +import onnxruntime +from pathlib import Path +from onnxruntime.quantization import CalibrationDataReader + + +class TestDataFeeds(CalibrationDataReader): + def __init__(self, data_feeds): + ''' + parameter data_feeds: list of input feed, each input feed is diction of {input_name: np_array} + ''' + self.data_feeds = data_feeds + self.iter_next = iter(self.data_feeds) + + def get_next(self): + return next(self.iter_next, None) + + def rewind(self): + self.iter_next = iter(self.data_feeds) + + +def check_op_type_count(testcase, model_path, **kwargs): + model = onnx.load(Path(model_path)) + optype2count = {} + for op_type in kwargs: + optype2count[op_type] = 0 + for node in model.graph.node: + if node.op_type in optype2count: + optype2count[node.op_type] += 1 + for op_type in kwargs: + testcase.assertEqual(kwargs[op_type], optype2count[op_type], 'op_type {} count not same'.format(op_type)) + + +def check_model_correctness(testcase, model_path_origin, model_path_to_check, inputs, rtol=1e-2, atol=0.05): + sess_options = onnxruntime.SessionOptions() + sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL + origin_sess = onnxruntime.InferenceSession(model_path_origin, sess_options=sess_options, providers=["CPUExecutionProvider"]) + origin_results = origin_sess.run([], inputs) + target_sess = onnxruntime.InferenceSession(model_path_to_check, sess_options=sess_options,providers=["CPUExecutionProvider"]) + target_results = target_sess.run([], inputs) + testcase.assertEqual(len(origin_results), len(target_results), 'result count are different') + for idx, ref_output in enumerate(origin_results): + output = target_results[idx] + np.testing.assert_allclose(ref_output, output, rtol=rtol, atol=atol) diff --git a/onnxruntime/test/python/quantization/test_op_pad.py b/onnxruntime/test/python/quantization/test_op_pad.py new file mode 100644 index 0000000000000..3bbc8fc38f272 --- /dev/null +++ b/onnxruntime/test/python/quantization/test_op_pad.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# coding: utf-8 +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import unittest +import onnx +import numpy as np +from onnx import helper, TensorProto +from onnxruntime.quantization import quantize_static, quantize_dynamic +from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count + + +class TestOpQuatizerPad(unittest.TestCase): + def input_feeds(self, n, name2shape): + input_data_list = [] + for i in range(n): + inputs = {} + for name, shape in name2shape.items(): + inputs.update({name: np.random.randint(-1, 2, shape).astype(np.float32)}) + input_data_list.extend([inputs]) + dr = TestDataFeeds(input_data_list) + return dr + + def construct_model_pad(self, output_model_path, pad_mode, pad_input_shape, pad_dims, constant_value=None): + # (input) + # | + # Pad + # | + # (output) + rank = len(pad_input_shape) + self.assertEqual(rank * 2, len(pad_dims)) + + input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, pad_input_shape) + pad_dims_initializer = helper.make_tensor('pad_dims', TensorProto.INT64, [2 * rank], pad_dims) + output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:]))] + output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + inputs = ['input', 'pad_dims'] + initializers = [pad_dims_initializer] + if (constant_value is not None) and (pad_mode is None or pad_mode == 'constant'): + constant_value_tensor = helper.make_tensor('padding_value', TensorProto.FLOAT, [], [constant_value]) + inputs.extend(['padding_value']) + initializers.extend([constant_value_tensor]) + kwargs = {'mode': pad_mode} if pad_mode is not None else {} + pad_node = helper.make_node('Pad', inputs, ['output'], name='PadNode', **kwargs) + + graph = helper.make_graph([pad_node], 'TestOpQuantizerPad_test_model', + [input_tensor], [output_tensor], initializer=initializers) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = onnx.IR_VERSION + + onnx.save(model, output_model_path) + + def construct_model_conv_pad(self, output_model_path, conv_input_shape, conv_weight_shape, + pad_input_shape, pad_mode, pad_dims, constant_value=None): + # (input) + # \ + # Conv + # / \ + # Identity Pad + # / \ + # (identity_out) (output) + rank = len(pad_input_shape) + self.assertEqual(rank * 2, len(pad_dims)) + + input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, conv_input_shape) + + conv_weight_arr = np.random.randint(-1, 2, conv_weight_shape).astype(np.float32) + conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name='conv1_weight') + conv_node = onnx.helper.make_node('Conv', ['input', 'conv1_weight'], ['conv_output'], name='conv_node') + + identity_out = helper.make_tensor_value_info('identity_out', TensorProto.FLOAT, pad_input_shape) + identity_node = helper.make_node('Identity', ['conv_output'], ['identity_out'], name='IdentityNode') + + pad_dims_initializer = helper.make_tensor('pad_dims', TensorProto.INT64, [2 * rank], pad_dims) + output_shape = [sum(e) for e in list(zip(pad_input_shape, pad_dims[:rank], pad_dims[rank:]))] + output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + pad_inputs = ['conv_output', 'pad_dims'] + initializers = [conv_weight_initializer, pad_dims_initializer] + if (constant_value is not None) and (pad_mode is None or pad_mode == 'constant'): + constant_value_tensor = helper.make_tensor('padding_value', TensorProto.FLOAT, [], [constant_value]) + pad_inputs.extend(['padding_value']) + initializers.extend([constant_value_tensor]) + kwargs = {'mode': pad_mode} if pad_mode is not None else {} + pad_node = helper.make_node('Pad', pad_inputs, ['output'], name='pad_node', **kwargs) + + graph = helper.make_graph([conv_node, identity_node, pad_node], 'TestOpQuantizerPad_test_model', + [input_tensor], [identity_out, output_tensor], initializer=initializers) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + model.ir_version = onnx.IR_VERSION + onnx.save(model, output_model_path) + + def quantize_mode(self, model_fp32_path, model_i8_path, data_reader=None): + if data_reader is not None: + quantize_static(model_fp32_path, model_i8_path, data_reader, reduce_range=True) + else: + quantize_dynamic(model_fp32_path, model_i8_path, reduce_range=True) + + def verify_should_not_trigger(self, quantize_mode='static'): + np.random.seed(108) + model_fp32_path = 'qop_pad_notrigger_fp32_{}.onnx'.format(quantize_mode) + model_i8_path = 'qop_pad_notrigger_i8_{}.onnx'.format(quantize_mode) + data_reader = self.input_feeds(1, {'input': [1, 16, 31, 31]}) + self.construct_model_pad(model_fp32_path, 'constant', [1, 16, 31, 31], [0, 0, 1, 2, 0, 0, 3, 4]) + self.quantize_mode(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader) + data_reader.rewind() + # DequantizeLinear=0 pad node is not been quantized as input is not quantized. + check_op_type_count(self, model_i8_path, DynamicQuantizeLinear=0, QuantizeLinear=0, DequantizeLinear=0) + check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next()) + + def test_static_quantize_no_trigger(self): + self.verify_should_not_trigger(quantize_mode='static') + + def test_dynamic_quantize_no_trigger(self): + self.verify_should_not_trigger(quantize_mode='dynamic') + + def verify_quantize_with_pad_mode(self, pad_mode, constant_value=None, quantize_mode='static'): + np.random.seed(108) + tag_pad_mode = pad_mode if pad_mode is not None else 'none' + tag_constant_value = '' if constant_value is None else '_value' + model_fp32_path = 'qop_pad_{}_fp32_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value) + model_i8_path = 'qop_pad_{}_i8_{}{}.onnx'.format(quantize_mode, tag_pad_mode, tag_constant_value) + data_reader = self.input_feeds(1, {'input': [1, 8, 33, 33]}) + self.construct_model_conv_pad(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31], + pad_mode, [0, 0, 1, 2, 0, 0, 3, 4], constant_value=constant_value) + self.quantize_mode(model_fp32_path, model_i8_path, None if quantize_mode != 'static' else data_reader) + data_reader.rewind() + # DequantizeLinear=2 means there are one DequantizeLinear Node aftr both conv and pad, + # which means pad node is running in quantized semantic. + # In dynamic quantize mode, pad operator in fact not quantized as input is fp32. + kwargs = {'DynamicQuantizeLinear': 1} if quantize_mode != 'static' else {'DequantizeLinear': 2, 'QuantizeLinear': 1} + check_op_type_count(self, model_i8_path, **kwargs) + check_model_correctness(self, model_fp32_path, model_i8_path, data_reader.get_next()) + + def test_static_mode_edge(self): + self.verify_quantize_with_pad_mode('edge', constant_value=None) + + def test_static_mode_reflect(self): + self.verify_quantize_with_pad_mode('reflect', constant_value=None) + + def test_static_mode_constant_default(self): + self.verify_quantize_with_pad_mode('constant', constant_value=None) + + def test_static_mode_constant_value(self): + self.verify_quantize_with_pad_mode('constant', constant_value=3.75) + + def test_dynamic_mode_edge(self): + self.verify_quantize_with_pad_mode('edge', constant_value=None, quantize_mode='dynamic') + + def test_dynamic_mode_reflect(self): + self.verify_quantize_with_pad_mode('reflect', constant_value=None, quantize_mode='dynamic') + + def test_dynamic_mode_constant_default(self): + self.verify_quantize_with_pad_mode('constant', constant_value=None, quantize_mode='dynamic') + + def test_dynamic_mode_constant_value(self): + self.verify_quantize_with_pad_mode('constant', constant_value=3.75, quantize_mode='dynamic') + + +if __name__ == '__main__': + unittest.main()