diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py index f32b5a65cac..645274c7870 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py @@ -3,8 +3,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import logging - import numpy as np import torch @@ -32,17 +30,20 @@ from executorch.backends.nxp.backend.ir.converter.node_converters.shared.conv_utils import ( ConvConversionResult, ConvParameters, + get_node_tensor_params, ) from executorch.backends.nxp.backend.ir.converter.quantization_utils import ( set_quantization_parameters_to_tensor, ) from executorch.backends.nxp.backend.ir.converter.tensor_utils import tensor_has_data from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType +from executorch.backends.nxp.backend.ir.tensor_formatting import TensorFormat from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( conv_2d_options, depthwise_conv_2d_options, reshape_options, + transpose_conv_options, ) from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node @@ -57,18 +58,53 @@ def _is_supported_on_target( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - activations = node.args[0] + num_macs = neutron_target_spec.get_num_macs() + node_t_params = get_node_tensor_params(node) weights = node.args[1] - groups = node.args[8] + conv_params = ConvParameters( + *ConvolutionConverter._get_convolution_arguments(node) + ) - if activations.meta["val"].shape[0] != 1: + if node_t_params["batch_size"] != 1: # Only batch size 1 is supported on neutron. return False - if groups == 1: # Regular convolution. + if conv_params.transposed: + # TransposeConv1d is not supported on Neutron + if len(conv_params.dilation) == 1: + return False + if not node_is_effectively_static_tensor(weights, parameters_mapping): + # Only supported if the weights are static, because TFLite `TransposeConv` uses permuted + # weights. In case the weights are dynamic, a Transpose operator would have to be added, which + # is not supported on Neutron. + return False + # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#876 TransposeConv2DKernelKind + if ( + conv_params.dilation != [1, 1] + or conv_params.padding[0] != 0 + or conv_params.padding[1] >= node_t_params["kernel_width"] + or ( + conv_params.padding[1] != 0 and node_t_params["inp_height"] != 1 + ) # Slice added by explicit padding + or conv_params.stride[0] != 1 + or ( + ( + conv_params.stride[1] != node_t_params["kernel_width"] / 2 + or node_t_params["out_height"] != 1 + ) + and conv_params.stride[1] != node_t_params["kernel_width"] + ) + or conv_params.stride[1] % 2 != 0 + or node_t_params["inp_channels"] % num_macs != 0 + or node_t_params["out_channels"] % num_macs != 0 + or node_t_params["kernel_width"] % 2 != 0 + or node_t_params["kernel_height"] != 1 + ): + return False + elif conv_params.groups == 1: # Regular convolution. pass elif conv_utils.group_conv_convertible_as_depthwise( - node, groups + node, conv_params.groups ): # Depthwise convolution. # Only supported if the weights are static, because TFLite `DepthwiseConv2D` uses permuted # weights. In case the weights are dynamic, a Transpose operator would have to be added, which @@ -76,10 +112,9 @@ def _is_supported_on_target( if not node_is_effectively_static_tensor(weights, parameters_mapping): return False elif conv_utils.group_conv_convertible_into_multiple_convolutions( - node, groups - ): # Separable conv. This should never be reached, as the node should have been decomposed into - # multiple parallel convolutions by the `SplitGroupConvolution` pre-processing pass. - logging.warning("Group convolution was not decomposed.") + node, conv_params.groups + ): # Separable conv. + # Requires addition of `Split` and `Concatenation` operators, which are not supported on Neutron. return False else: # Unexpected case (should never happen). return False @@ -96,11 +131,15 @@ def _is_supported_in_IR( dimensions = input_tensor_rank - 2 is_transposed = node.args[6] output_padding = node.args[7] + groups = node.args[8] - if is_transposed: + if is_transposed and conv_utils.group_conv_convertible_as_depthwise( + node, groups + ): + # TFLite does not support transposed depthwise convolution return False - if output_padding != [0] * dimensions: + if not is_transposed and output_padding != [0] * dimensions: return False if input_tensor_safe(node, 2) is None: @@ -115,6 +154,20 @@ def _is_supported_in_IR( Transposed = bool Groups = int + def _compute_slicing_params( + self, output_shape, explicit_padding + ) -> tuple[list[int], list[int]]: + begins = [] + sizes = [] + + for axis in range(len(output_shape)): + (start, end) = explicit_padding[axis] + + begins.append(start) + sizes.append(output_shape[axis] - start - end) + + return begins, sizes + @staticmethod def _get_convolution_arguments( conv_node: Node, @@ -130,7 +183,7 @@ def _get_convolution_arguments( list(padding), list(dilation), transposed, - out_padding, + list(out_padding), groups, ) @@ -259,15 +312,16 @@ def _convert_unpadded_2D( [output_channels], "zero_bias", bias_type, False ) - # Compute scale and zero point for bias tensor - input_scale = np.array(x.quantization.scale.vector) - weight_scale = np.array(w.quantization.scale.vector) - bias_scale = input_scale * weight_scale - bias_zero_point = np.zeros(weight_scale.shape, dtype=np.int64) + if w.type in [TensorType.INT8, TensorType.UINT8]: + # Compute scale and zero point for bias tensor + input_scale = np.array(x.quantization.scale.vector) + weight_scale = np.array(w.quantization.scale.vector) + bias_scale = input_scale * weight_scale + bias_zero_point = np.zeros(weight_scale.shape, dtype=np.int64) - set_quantization_parameters_to_tensor( - b, bias_scale, bias_zero_point, quantized_dimension=0 - ) + set_quantization_parameters_to_tensor( + b, bias_scale, bias_zero_point, quantized_dimension=0 + ) # Assign the operator its TFLite inputs and outputs t_op.tmp_inputs = [x, w, b] @@ -278,87 +332,195 @@ def _convert_unpadded_2D( return conversion_result - def _convert_2d_conv( + def _convert_transpose_conv( self, t_op: tflite_model.Operator, conv_params: ConvParameters - ) -> list[tflite_model.Operator]: - if conv_utils.group_conv_convertible_as_depthwise( - t_op, conv_params.groups - ): # Convert to `DepthwiseConv2D`. - t_op.builtin_options = depthwise_conv_2d_options.DepthwiseConv2D() - - conversion_result = self._convert_unpadded_2D(t_op, conv_params) - t_op.builtin_options.padding, explicit_padding = ( - aten_translator.convert_padding(conv_params.padding) - ) - if explicit_padding is not None: - # Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case). - input_quantization = t_op.tmp_inputs[0].quantization - pad_value = ( - None - if input_quantization is None - else np.array(input_quantization.zero_point[0]).astype( - tf_lite_type_to_numpy(t_op.tmp_inputs[0].type) - ) - ) - conversion_result.ops_list.add_pre( - self.builder.create_pad_operator_before( - t_op, 0, explicit_padding, constant_value=pad_value - ) - ) + ) -> conv_utils.ConvConversionResult: + """Convert the `aten.convolution` into TFLite TransposeConv. The `builtin_options` must be + converted by the caller. + """ + common.assign_2d_strides(t_op.builtin_options, conv_params.stride) - # DepthwiseConv2D expects weights in format [kernel_channels, kernel_height, kernel_width, output_channels] - perm = [3, 1, 2, 0] - weight_tensor = conversion_result.conv_weight_tensor - if tensor_has_data(weight_tensor): - # Transpose cloned tensor statically - t_op.tmp_inputs[1] = self.builder.create_transposed_tensor( - weight_tensor, perm - ) + x: tflite_model.Tensor = t_op.tmp_inputs[0] + w: tflite_model.Tensor = t_op.tmp_inputs[1] + y: tflite_model.Tensor = t_op.tmp_outputs[0] + + if (b := try_get_input(t_op, 2)) is None: + # Operator has no bias. Convolution aten op can omit it, TFLite can't. + # Weight tensor format in TFLite: [C, kH, kW, O] + # (C = input channels, O = output channels, kW = kernel width, kH = kernel height) + output_channels = w.shape.vector[-1] - if t_op.tmp_inputs[1].quantization is not None: - # Model is quantized - t_op.tmp_inputs[1].quantization.quantized_dimension = 3 + if w.type == TensorType.FLOAT32: + bias_type = np.dtype(np.float32) + elif w.type in [TensorType.INT8, TensorType.UINT8]: + bias_type = np.dtype(np.int32) else: - raise NotImplementedError("Dynamic Depthwise Conv weights.") + # Should never happen. + raise NotImplementedError( + f"Convolution node with unsupported weight type: {w.type}" + ) - elif conv_utils.group_conv_convertible_into_multiple_convolutions( - t_op, conv_params.groups - ): - # This case should have been rejected in the `is_supported_on_target()` method. - raise RuntimeError("Group convolution was not decomposed.") + b = self.builder.create_zeros_tensor( + [output_channels], "zero_bias", bias_type, True + ) + + if w.type in [TensorType.INT8, TensorType.UINT8]: + # Compute scale and zero point for bias tensor + input_scale = np.array(x.quantization.scale.vector) + weight_scale = np.array(w.quantization.scale.vector) + bias_scale = input_scale * weight_scale + bias_zero_point = np.zeros(weight_scale.shape, dtype=np.int64) + + set_quantization_parameters_to_tensor( + b, bias_scale, bias_zero_point, quantized_dimension=0 + ) + # TransposeConv weight tensor format in TFLite: [O, kH, kW, C] + # (C = input channels, O = output channels, kW = kernel width, kH = kernel height) + if tensor_has_data(w): + # Transpose cloned tensor statically + w = self.builder.create_transposed_tensor(w, [3, 1, 2, 0]) + + if w.quantization is not None: + # Model is quantized + w.quantization.quantized_dimension = 0 else: - # Convert to regular `Conv2D`. - t_op.builtin_options = conv_2d_options.Conv2D() - conversion_result = self._convert_unpadded_2D(t_op, conv_params) - t_op.builtin_options.padding, explicit_padding = ( - aten_translator.convert_padding(conv_params.padding) + raise NotImplementedError("Dynamic Transpose Conv weights.") + w.tensor_format = TensorFormat.TRANSPOSE_CONV_2D_WEIGHT_FORMAT + + output_shape_tensor_data = np.asarray(y.shape.vector, dtype=np.int32) + o = self.builder.create_tensor_for_data( + output_shape_tensor_data, "output_shape" + ) + + # Assign the operator its TFLite inputs and outputs + t_op.tmp_inputs = [o, w, x, b] + t_op.tmp_outputs = [y] + conversion_result = ConvConversionResult(x, w, b, y, o) + t_op.builtin_options.padding, explicit_padding = ( + aten_translator.convert_padding(conv_params.padding) + ) + if explicit_padding is not None: + # Add padding to output shape to make sure we have computed all the data we need + for idx, padding in enumerate(explicit_padding): + output_shape_tensor_data[idx] += padding[0] + padding[1] + y.shape = tflite_model.Shape(output_shape_tensor_data.tolist()) + + # We need to "cut" produced tensor by size of explicit padding + begins, sizes = self._compute_slicing_params( + output_shape_tensor_data.tolist(), explicit_padding ) - if explicit_padding is not None: - # Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case). - input_quantization = t_op.tmp_inputs[0].quantization - pad_value = ( - None - if input_quantization is None - else np.array(input_quantization.zero_point[0]).astype( - tf_lite_type_to_numpy(t_op.tmp_inputs[0].type) - ) + slice_op = self.builder.create_slice_after(t_op, 0, begins, sizes) + conversion_result.ops_list.add_post(slice_op) + + conversion_result.ops_list.middle_op = t_op + + return conversion_result + + def _convert_2d_conv( + self, t_op: tflite_model.Operator, conv_params: ConvParameters + ) -> list[tflite_model.Operator]: + if conv_params.transposed: + t_op.builtin_options = transpose_conv_options.TransposeConv() + if conv_utils.group_conv_convertible_into_multiple_convolutions( + t_op, conv_params.groups + ): + # Convert to separated `TransposeConv`. + raise NotImplementedError("Separated TransposeConv not implemented.") + else: + # Convert to `TransposeConv`. + conversion_result = self._convert_transpose_conv(t_op, conv_params) + + else: + if conv_utils.group_conv_convertible_as_depthwise( + t_op, conv_params.groups + ): # Convert to `DepthwiseConv2D`. + t_op.builtin_options = depthwise_conv_2d_options.DepthwiseConv2D() + + conversion_result = self._convert_unpadded_2D(t_op, conv_params) + t_op.builtin_options.padding, explicit_padding = ( + aten_translator.convert_padding(conv_params.padding) ) - conversion_result.ops_list.add_pre( - self.builder.create_pad_operator_before( - t_op, 0, explicit_padding, constant_value=pad_value + if explicit_padding is not None: + # Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case). + input_quantization = t_op.tmp_inputs[0].quantization + pad_value = ( + None + if input_quantization is None + else np.array(input_quantization.zero_point[0]).astype( + tf_lite_type_to_numpy(t_op.tmp_inputs[0].type) + ) + ) + conversion_result.ops_list.add_pre( + self.builder.create_pad_operator_before( + t_op, 0, explicit_padding, constant_value=pad_value + ) ) + + # DepthwiseConv2D expects weights in format [kernel_channels, kernel_height, kernel_width, output_channels] + perm = [3, 1, 2, 0] + weight_tensor = conversion_result.conv_weight_tensor + if tensor_has_data(weight_tensor): + # Transpose cloned tensor statically + t_op.tmp_inputs[1] = self.builder.create_transposed_tensor( + weight_tensor, perm + ) + + if t_op.tmp_inputs[1].quantization is not None: + # Model is quantized + t_op.tmp_inputs[1].quantization.quantized_dimension = 3 + else: + raise NotImplementedError("Dynamic Depthwise Conv weights.") + + elif conv_utils.group_conv_convertible_into_multiple_convolutions( + t_op, conv_params.groups + ): # Convert to separated `Conv2D`. + t_op.builtin_options = conv_2d_options.Conv2D() + + return conv_utils.create_separated_convolutions_based_on_group( + t_op, + conv_params, + self.builder, + self._convert_unpadded_2D, + conv_utils.conv_op_factory, + ) + + else: + # Convert to regular `Conv2D`. + t_op.builtin_options = conv_2d_options.Conv2D() + conversion_result = self._convert_unpadded_2D(t_op, conv_params) + t_op.builtin_options.padding, explicit_padding = ( + aten_translator.convert_padding(conv_params.padding) ) + if explicit_padding is not None: + # Need to prepend a 'Pad' operator, which adds 0s (or `zero_point` for the quantized case). + input_quantization = t_op.tmp_inputs[0].quantization + pad_value = ( + None + if input_quantization is None + else np.array(input_quantization.zero_point[0]).astype( + tf_lite_type_to_numpy(t_op.tmp_inputs[0].type) + ) + ) + conversion_result.ops_list.add_pre( + self.builder.create_pad_operator_before( + t_op, 0, explicit_padding, constant_value=pad_value + ) + ) return conversion_result.ops_list.flatten() def convert(self, node: Node): self.assert_convertible(node) - stride, padding, dilation, _, _, groups = self._get_convolution_arguments(node) + stride, padding, dilation, transposed, out_padding, groups = ( + self._get_convolution_arguments(node) + ) t_op = self._create_tflite_op_with_io_tensors(node) - conv_params = ConvParameters(stride, padding, dilation, groups) + conv_params = ConvParameters( + stride, padding, dilation, transposed, out_padding, groups + ) rank = t_op.tmp_inputs[1].shape.len() if rank == 3: # Conv1D diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/qdq_dequantize_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/qdq_dequantize_converter.py index 1d7c6b44627..3e20e504e8a 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/qdq_dequantize_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/qdq_dequantize_converter.py @@ -2,11 +2,13 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + from abc import ABC, abstractmethod import numpy as np from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( + create_channels_last_to_channels_first_permutation, torch_type_to_numpy_type, ) from executorch.backends.nxp.backend.ir.converter.node_converter import ( @@ -16,6 +18,8 @@ from executorch.backends.nxp.backend.ir.converter.quantization_utils import ( set_quantization_parameters_to_tensor, ) +from executorch.backends.nxp.backend.ir.tensor_formatting import TensorFormat +from executorch.backends.nxp.backend.ir.tflite_generator.tflite_model import Tensor from torch.fx import Node from torch.nn import Parameter @@ -50,6 +54,9 @@ def convert(self, node: Node): scale = self.get_scale(node) zero_point = self.get_zero_point(node) + quantized_dimension = 0 + if isinstance(self, QDQPerChannelDequantizeConverter): + quantized_dimension = self.get_quantization_dimension(from_tensor, node) if self.context.parameters_mapping.get(node.args[0].name, None) is None: # Convert dequantize as identity op (Transpose that will be removed) because @@ -57,15 +64,21 @@ def convert(self, node: Node): # here we will change input name of the model. t_op = self._create_tflite_op_with_io_tensors(node) - set_quantization_parameters_to_tensor(to_tensor, scale, zero_point, 0) - set_quantization_parameters_to_tensor(from_tensor, scale, zero_point, 0) + set_quantization_parameters_to_tensor( + to_tensor, scale, zero_point, quantized_dimension + ) + set_quantization_parameters_to_tensor( + from_tensor, scale, zero_point, quantized_dimension + ) from_tensor.type = to_tensor.type self.builder.turn_operator_to_identity(t_op) self.builder.append_operators([t_op]) else: # Dequantize consumes tensor with static data -> convert as a tensor - set_quantization_parameters_to_tensor(to_tensor, scale, zero_point, 0) + set_quantization_parameters_to_tensor( + to_tensor, scale, zero_point, quantized_dimension + ) # Change type so we pass check tensor similarity check when redirecting from_tensor.type = to_tensor.type @@ -89,3 +102,15 @@ def get_zero_point(self, node: Node) -> np.ndarray: def get_scale(self, node: Node) -> np.ndarray: return self.context.parameters_mapping[node.args[1].name].numpy() + + def get_quantization_dimension(self, from_tensor: Tensor, node: Node) -> int: + quantization_dimension = node.args[3] + + # Quantization dimension is affected by tensor format + if from_tensor.tensor_format == TensorFormat.CHANNELS_LAST: + tensor_rank = len(from_tensor.shape.vector) + perm = create_channels_last_to_channels_first_permutation( + tensor_rank, return_list=True + ) + quantization_dimension = perm[quantization_dimension] + return quantization_dimension diff --git a/backends/nxp/backend/ir/converter/node_converters/shared/conv_utils.py b/backends/nxp/backend/ir/converter/node_converters/shared/conv_utils.py index 5817fd127b3..2012ecc8640 100755 --- a/backends/nxp/backend/ir/converter/node_converters/shared/conv_utils.py +++ b/backends/nxp/backend/ir/converter/node_converters/shared/conv_utils.py @@ -16,6 +16,8 @@ class ConvParameters: stride: list[int] padding: list[int] dilation: list[int] + transposed: bool + out_padding: list[int] groups: int @@ -35,6 +37,29 @@ def _get_IO_channels(node: Node | tflite_model.Operator) -> (int, int): return input_channels, output_channels +def get_node_tensor_params(node: Node) -> dict: + node_tensor_params = {} + + input_tensor = node.args[0] + assert len(input_tensor.meta["val"].shape) in [3, 4], "Supports only Conv 1D, 2D." + node_tensor_params["batch_size"] = input_tensor.meta["val"].shape[0] + node_tensor_params["inp_channels"] = input_tensor.meta["val"].shape[1] + node_tensor_params["inp_height"] = input_tensor.meta["val"].shape[2] + if len(input_tensor.meta["val"].shape) == 4: + node_tensor_params["inp_width"] = input_tensor.meta["val"].shape[3] + + weights = node.args[1] + node_tensor_params["out_channels"] = node.meta["val"].shape[1] + node_tensor_params["out_height"] = node.meta["val"].shape[2] + if len(node.meta["val"].shape) == 4: + node_tensor_params["out_width"] = node.meta["val"].shape[3] + node_tensor_params["kernel_height"] = weights.meta["val"].shape[2] + if len(weights.meta["val"].shape) == 4: + node_tensor_params["kernel_width"] = weights.meta["val"].shape[3] + + return node_tensor_params + + def group_conv_convertible_as_depthwise(node: Node | tflite_model.Operator, group: int): input_channels, output_channels = _get_IO_channels(node) @@ -70,9 +95,11 @@ def __init__( weight_tensor: tflite_model.Tensor, bias_tensor: tflite_model.Tensor, output_tensor: tflite_model.Tensor, + output_shape_tensor: tflite_model.Tensor | None = None, ): self.conv_input_tensor = input_tensor self.conv_weight_tensor = weight_tensor self.conv_bias_tensor = bias_tensor self.conv_output_tensor = output_tensor + self.output_shape_tensor = output_shape_tensor self.ops_list = OpsList() diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 2681e221869..db4a7c67833 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -18,6 +18,7 @@ CatPattern, Conv1dPattern, Conv2dPattern, + ConvTranspose2dPattern, DropoutPattern, FlattenPattern, HardTanhInPlacePattern, @@ -194,6 +195,7 @@ def __init__(self): NeutronAtenQuantizer(CatPattern(), static_qconfig), NeutronAtenQuantizer(Conv1dPattern(), static_qconfig), NeutronAtenQuantizer(Conv2dPattern(), static_qconfig), + NeutronAtenQuantizer(ConvTranspose2dPattern(), static_qconfig), NeutronAtenQuantizer(DropoutPattern(), static_qconfig), NeutronAtenQuantizer(FlattenPattern(), static_qconfig), NeutronAtenQuantizer(HardTanhPattern(), static_qconfig), diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 9588ce24c9e..a5714103945 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -95,6 +95,7 @@ class SharedSpecPattern(QuantizationPattern): quantization parameters (scale and zero-point). """ + @abstractmethod def partition_types(self) -> list[torch.nn.Module]: pass @@ -353,11 +354,61 @@ def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.conv1d.default] +class ConvTranspose1dPattern(ConvPattern): + def partition_types(self) -> list[OpOverload]: + return [torch.ops.aten.conv_transpose1d.default] + + class Conv2dPattern(ConvPattern): def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.conv2d.default] +class ConvTranspose2dPattern(QuantizationPattern): + def partition_types(self) -> list[OpOverload]: + return [torch.ops.aten.conv_transpose2d.input] + + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors: + conv_node = fused_partition[0].nodes[-1] + + bias_quantization_qspec = DerivedQuantizationSpec( + derived_from=[ + (conv_node.args[0], conv_node), + (conv_node.args[1], conv_node), + ], + derive_qparams_fn=get_bias_qparams, + dtype=torch.int32, + quant_min=-(2**31) + 1, + quant_max=2**31 - 1, + qscheme=torch.per_channel_symmetric, + ch_axis=0, + ) + + weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver + weight_quantization_spec = QuantizationSpec( + dtype=torch.int8, + observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr, + quant_min=-127, + quant_max=127, + qscheme=torch.per_channel_symmetric, + ch_axis=1, + ) + + # Keep bias empty if not supplied + bias = [] + if len(conv_node.args) > 2 and conv_node.args[2] is not None: + bias = [(conv_node, NodeArgsIdx(2), bias_quantization_qspec)] + + return PartitionAnchors( + inputs=[(conv_node, NodeArgsIdx(0))], + weights=[(conv_node, NodeArgsIdx(1), weight_quantization_spec)], + biases=bias, + output=[(conv_node,)], + ) + + class DropoutPattern(SharedSpecPattern): """ Quantizer for Dropout operator. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py index d7a59cad6d6..ca4a12146fe 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py @@ -22,10 +22,12 @@ ) from executorch.backends.nxp.tests.executors import ( convert_run_compare, + graph_contains_any_of_ops, ToChannelFirstPreprocess, ToChannelLastPreprocess, ) from executorch.backends.nxp.tests.models import Conv1dModule, Conv2dModule +from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram @@ -35,12 +37,15 @@ def reseed_model_per_test_run(): np.random.seed(23) +@pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("stride", [1, 2]) @pytest.mark.parametrize("dilation", [2, 1]) @pytest.mark.parametrize("kernel_size", [(1,), (3,)]) -def test_conv1d_quant_conversion(stride, dilation, kernel_size, mocker): +def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker): input_shape = (1, 4, 16) - model = Conv1dModule(stride=stride, dilation=dilation, kernel_size=kernel_size) + model = Conv1dModule( + bias=bias, stride=stride, dilation=dilation, kernel_size=kernel_size + ) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") ops_spy = mocker.spy(ModelBuilder, "finish") @@ -142,13 +147,17 @@ def test_conv1d_quant_conversion__padded( ) # `Conv` input zp. +@pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("stride", [1, 2]) @pytest.mark.parametrize("dilation", [2, 1]) @pytest.mark.parametrize("kernel_size", [(1,), (3,)]) -def test_conv1d_quant_conversion__depthwise(stride, dilation, kernel_size, mocker): +def test_conv1d_quant_conversion__depthwise( + bias, stride, dilation, kernel_size, mocker +): input_shape = (1, 4, 16) group = input_shape[1] model = Conv1dModule( + bias=bias, group=group, in_channels=group, out_channels=group, @@ -369,6 +378,26 @@ def test_conv1d_quant_conversion__depthwise__padded( (1, 32, 32, 32), id="In ch 32, out ch 32, kernel 4, padding (0, 2), dilation (1, 2)", ), + pytest.param( + Conv2dModule( + in_channels=8, out_channels=32, kernel_size=5, padding=3, bias=False + ), + (1, 8, 32, 32), + id="In ch 8, out ch 32, kernel 5, padding 3, no bias", + ), + pytest.param( + Conv2dModule( + in_channels=32, + out_channels=32, + kernel_size=3, + padding=(1, 0), + dilation=(3, 1), + bias=False, + ), + (1, 32, 35, 35), + id="In ch 32, out ch 32, kernel 3, padding (1, 0), dilation (3, 1)," + "no bias", + ), ], ) def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape): @@ -395,47 +424,12 @@ def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape): ) -@pytest.mark.parametrize("stride", [1, 2]) -@pytest.mark.parametrize("dilation", [1, 2]) -@pytest.mark.parametrize("kernel_shape", [[1, 2], [3, 3], [4, 1]]) -def test_conv2d_conversion__depthwise(stride, dilation, kernel_shape, mocker): - input_shape = (1, 3, 12, 16) - group = input_shape[1] - edge_program = to_edge_program( - Conv2dModule( - group=group, - in_channels=group, - out_channels=group, - stride=stride, - dilation=dilation, - kernel_size=kernel_shape, - ), - input_shape, - ).exported_program() - - input_data = np.random.random(input_shape).astype(np.float32) - - spy = mocker.spy(ModelBuilder, "finish") - - convert_run_compare( - edge_program, - input_data, - tflite_input_preprocess=ToChannelLastPreprocess(), - tflite_output_preprocess=ToChannelFirstPreprocess(), - atol=4e-7, - ) - conversion_result = spy.spy_return - ops = conversion_result.sub_graphs[0].operators.vector - - assert len(ops) == 1 - assert ops[0].builtin_options.operator_type == BuiltinOperator.DEPTHWISE_CONV_2D - - +@pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("stride", [1, 2]) @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.parametrize("kernel_shape", [[1, 2], [3, 3], [4, 1]]) def test_conv2d_conversion__depthwise__quantized( - stride, dilation, kernel_shape, mocker + bias, stride, dilation, kernel_shape, mocker ): input_shape = (1, 4, 12, 12) group = input_shape[1] @@ -443,6 +437,7 @@ def test_conv2d_conversion__depthwise__quantized( edge_program = to_quantized_edge_program( Conv2dModule( + bias=bias, group=group, in_channels=group, out_channels=group, @@ -517,3 +512,154 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker): len(nodes) == 7 ) # input, Quant, lowered_module, delegate_call, getitem, Deq, output assert nodes[2].target == "lowered_module_0" + + +@pytest.mark.parametrize( + "model, input_shape", + [ + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 4), stride=(1, 2)), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2)", + ), + pytest.param( + torch.nn.ConvTranspose2d(64, 64, (1, 2), stride=(1, 2)), + (1, 64, 3, 12), + id="In ch 64, out ch 64, kernel (1, 2), stride (1, 2)", + ), + pytest.param( + torch.nn.ConvTranspose2d( + 16, 24, (1, 6), stride=(1, 6), output_padding=(0, 3) + ), + (1, 16, 7, 15), + id="In ch 16, out ch 24, kernel (1, 6), stride (1, 6), output_padding (0, 3)", + ), + pytest.param( + torch.nn.ConvTranspose2d(16, 40, (1, 4), stride=(1, 4), padding=(0, 1)), + (1, 16, 1, 27), + id="In ch 16, out ch 40, kernel (1, 4), stride (1, 4), padding (0, 1)", + ), + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 4), stride=(1, 2), padding=(0, 1)), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2), padding (0, 1)", + ), + pytest.param( + torch.nn.ConvTranspose2d( + 8, 16, (1, 8), stride=(1, 4), output_padding=(0, 2) + ), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 8), stride (1, 4), output_padding (0, 2)", + ), + pytest.param( + torch.nn.ConvTranspose2d(16, 16, (1, 4), stride=(1, 2)), + (1, 16, 1, 16), + id="In ch 16, out ch 16, kernel (1, 4), stride (1, 2)", + ), + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 4), stride=(1, 2), bias=False), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2), no bias", + ), + pytest.param( + torch.nn.ConvTranspose2d( + 8, 16, (1, 4), stride=(1, 2), padding=(0, 1), bias=False + ), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2)," + "padding (0, 1), no bias", + ), + ], +) +def test_conv_transpose2d_conversion__quantized( + mocker, model: torch.nn.Module, input_shape +): + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + # Make sure the `TransposeConv` was delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, ops=[exir_ops.edge.aten.convolution.default] + ) + assert any("lowered_module" in node.name for node in edge_program.graph.nodes) + + # Capture generated model + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + + # Capture converted program + exported_program: ExportedProgram = converter_spy.call_args.args[1] + + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + + convert_run_compare( + exported_program, + tflite_input_preprocess=ToChannelLastPreprocess(), + tfl_model=tflite_flatbuffers_model, + tflite_output_preprocess=ToChannelFirstPreprocess(), + input_data=input_data, + atol=1.0, + ) + + +@pytest.mark.parametrize( + "model, input_shape", + [ + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 4), stride=(1, 2), dilation=(1, 2)), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2), " + "dilation (1, 2) - Dilation != (1, 1)", + ), + pytest.param( + torch.nn.ConvTranspose2d(6, 16, (1, 4), stride=(1, 2)), + (1, 6, 1, 16), + id="In ch 6, out ch 16, kernel (1, 4), stride (1, 2) - In channels % num_macs != 0", + ), + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 4), stride=(1, 2)), + (1, 8, 4, 16), + id="In ch 8, out ch 16, kernel (1, 4), stride (1, 2) - Out height != 1, stride width" + " != kernel width", + ), + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (2, 4), stride=(1, 2), padding=(0, 1)), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (2, 4), stride (1, 2), padding " + "(0, 1) - Out height != 1, stride width != kernel width", + ), + pytest.param( + torch.nn.ConvTranspose2d(8, 16, (1, 5), stride=(1, 4)), + (1, 8, 1, 16), + id="In ch 8, out ch 16, kernel (1, 5), stride (1, 4) - Stride width != kernel width / 2" + ", stride width != kernel width", + ), + pytest.param( + torch.nn.ConvTranspose2d(16, 12, (1, 4), stride=(3, 3)), + (1, 16, 1, 16), + id="In ch 16, out ch 12, kernel (1, 4), stride (3, 3) - Out channels % num_macs != 0", + ), + pytest.param( + torch.nn.ConvTranspose2d(64, 64, (1, 4), stride=(1, 2)), + (1, 64, 3, 12), + id="In ch 64, out ch 64, kernel (1, 4), stride (1, 2) - Out height != 1, stride width" + " != kernel width", + ), + pytest.param( + torch.nn.ConvTranspose2d(16, 40, (1, 4), stride=(1, 4), padding=(0, 1)), + (1, 16, 4, 27), + id="In ch 16, out ch 40, kernel (1, 4), stride (1, 4), padding (0, 1) - Padding width " + "!= 1 and input height != 1", + ), + ], +) +def test_conv_transpose2d_non_delegated_conversion__quantized( + model: torch.nn.Module, input_shape +): + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + nodes = list(edge_program.graph.nodes) + assert len(nodes) == 15 + assert ( + nodes[11].target.__name__ == "aten.convolution.default" + ) # TransposeConv not delegated.