diff --git a/backends/nxp/neutron_node_extraction.py b/backends/nxp/neutron_node_extraction.py index 10648b48849..2eb0f2d18c0 100644 --- a/backends/nxp/neutron_node_extraction.py +++ b/backends/nxp/neutron_node_extraction.py @@ -6,7 +6,6 @@ from dataclasses import dataclass import numpy as np - from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( BuiltinOperator, ) @@ -15,6 +14,10 @@ @dataclass class NeutronNodeArtifacts: + input_names: list[str] + input_indices: list[int] + output_names: list[str] + output_indices: list[int] microcode: np.ndarray weights: np.ndarray kernels: np.ndarray @@ -99,4 +102,42 @@ def extract_artifacts_from_neutron_node( microcode.dtype == weights.dtype == kernels.dtype == np.dtype("uint8") ), "The Neutron Node uses unexpected data types." - return NeutronNodeArtifacts(microcode, weights, kernels) + input_names = [] + input_indices = [] + graph_inputs = sub_graph.InputsAsNumpy() + node_inputs = neutron_node.InputsAsNumpy()[:-3] + for tensor_idx in node_inputs: + which_graph_input = np.where(graph_inputs == tensor_idx)[0] + assert ( + which_graph_input.size == 1 + ), "Mismatch between Neutron Node inputs and graph inputs." + input_indices.append(which_graph_input[0]) + input_names.append(sub_graph.Tensors(graph_inputs[which_graph_input[0]]).Name()) + + assert ( + neutron_node.OutputsLength() >= 2 + ), f"The Neutron Node only has `{neutron_node.GetOutputsLen()}` outputs. Expected at least `2` including the scratch buffer." + + output_names = [] + output_indices = [] + graph_outputs = sub_graph.OutputsAsNumpy() + node_outputs = neutron_node.OutputsAsNumpy()[:-1] + for tensor_idx in node_outputs: + which_graph_output = np.where(graph_outputs == tensor_idx)[0] + assert ( + which_graph_output.size == 1 + ), "Mismatch between Neutron Node outputs and graph outputs." + output_indices.append(which_graph_output[0]) + output_names.append( + sub_graph.Tensors(graph_outputs[which_graph_output[0]]).Name() + ) + + return NeutronNodeArtifacts( + input_names, + input_indices, + output_names, + output_indices, + microcode, + weights, + kernels, + ) diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index 341cf23f885..dd7d64227e3 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -245,19 +245,23 @@ def _format_string_for_array(self, array: np.ndarray) -> str: return f"{array.size}s{self._padding_format_string_for_array(array)}" - def _create_payload_header(self, io_formats) -> np.ndarray: + def _create_payload_header(self, io_formats, neutron_artifacts) -> np.ndarray: """ Create bytes header for returned payload. It contains information about input and output tensor formats. Tensors are ordered based on graph signature of ExportedProgram. Header schema: - +----------------------------------+-----------------------------------+ - | Input TensorFormats length (1B) | Output TensorFormats length (1B) | - +----------------------------------+-----------------------------------+ - | 1st input tensor format (1B) | [nth* input tensor format (1B)] | - +----------------------------------+-----------------------------------+ - | 1st output tensor format (1B) | [nth* output tensor format (1B)] | - +----------------------------------+-----------------------------------+ + +----------------------------+-----------------------------+------------------------+ + | Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) | + +----------------------------+-----------+-----------------+------------------------+ + | 1st input tensor format (1B) | [nth* input tensor format (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st output tensor format (1B) | [nth* output tensor format (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st input map (1B) | [nth* input map (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st output map (1B) | [nth* output map (1B)] | + +----------------------------------------+------------------------------------------+ :param io_formats: IO tensors formats. :return: Bytes representation of payload header. @@ -265,19 +269,43 @@ def _create_payload_header(self, io_formats) -> np.ndarray: inputs = io_formats["inputs"] outputs = io_formats["outputs"] - assert len(inputs) < 256, "Models with more than 255 inputs are not supported." assert ( - len(outputs) < 256 + len(neutron_artifacts.input_indices) < 256 + ), "Models with more than 255 inputs are not supported." + assert ( + len(neutron_artifacts.output_indices) < 256 ), "Models with more than 255 outputs are not supported." - header_data = [len(inputs)] - header_data.append(len(outputs)) + header_data = [len(neutron_artifacts.input_indices)] + header_data.append(len(neutron_artifacts.output_indices)) + header_data.append(len(inputs)) - for _tensor, tensor_format in inputs.items(): - header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0) + for input_name in neutron_artifacts.input_names: + try: + header_data.append( + 1 + if inputs[input_name.decode()] == TensorFormat.CHANNELS_LAST + else 0 + ) + except KeyError: + raise AssertionError( + f"Input tensor `{input_name.decode()}` not found in the converted model." + ) - for _tensor, tensor_format in outputs.items(): - header_data.append(1 if tensor_format == TensorFormat.CHANNELS_LAST else 0) + for output_name in neutron_artifacts.output_names: + try: + header_data.append( + 1 + if outputs[output_name.decode()] == TensorFormat.CHANNELS_LAST + else 0 + ) + except KeyError: + raise AssertionError( + f"Output tensor `{output_name.decode()}` not found in the converted model." + ) + + header_data.extend(neutron_artifacts.input_indices) + header_data.extend(neutron_artifacts.output_indices) # noinspection PyTypeChecker return np.array(header_data, dtype=np.uint8) @@ -314,9 +342,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes: +----------------------------------------------------------------------------------------------------------------+ | 16 bytes aligned blocks | - +===========================+===========================+============================+===========================+ - | Input formats length (1B) | Output formats length (1B) | [nth* input format (1B)] | [nth* output format (1B)] | - +---------------------------+--------------------------- +---------------------------+---------------------------+ + +================================================================================================================+ + | Header | + +----------------------------------------------------------------------------------------------------------------+ | Neutron microcode | +----------------------------------------------------------------------------------------------------------------+ | Neutron weights | @@ -331,9 +359,9 @@ def get_binary_payload(self, io_formats, neutron_model) -> bytes: :param neutron_model: Neutron model with single NeutronGraph node. :return: 16 bytes aligned binary payload. """ - header = self._create_payload_header(io_formats) - # Extract the Neutron microcode, weights and kernels from the Neutron Node in the `neutron_model`. neutron_artifacts = extract_artifacts_from_neutron_node(neutron_model) + header = self._create_payload_header(io_formats, neutron_artifacts) + return self._pack_with_alignment(header, neutron_artifacts) diff --git a/backends/nxp/runtime/NeutronBackend.cpp b/backends/nxp/runtime/NeutronBackend.cpp index 7b5278ebfd5..ef31054e933 100644 --- a/backends/nxp/runtime/NeutronBackend.cpp +++ b/backends/nxp/runtime/NeutronBackend.cpp @@ -25,37 +25,53 @@ namespace neutron { #define ALIGN_SIZE(size) \ ((size + BUFFER_ALIGNMENT - 1) & (~(BUFFER_ALIGNMENT - 1))) +// clang-format off /* Header schema: - +----------------------------------+-----------------------------------+ - | Input TensorFormats length (1B) | Output TensorFormats length (1B) | - +----------------------------------+-----------------------------------+ - | 1st input tensor format (1B) | [nth* input tensor format (1B)] | - +----------------------------------+-----------------------------------+ - | 1st output tensor format (1B) | [nth* output tensor format (1B)] | - +----------------------------------+-----------------------------------+ + +----------------------------+-----------------------------+------------------------+ + | Neutron inputs length (1B) | Neutron outputs length (1B) | Input args length (1B) | + +----------------------------+-----------+-----------------+------------------------+ + | 1st input tensor format (1B) | [nth* input tensor format (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st output tensor format (1B) | [nth* output tensor format (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st input map (1B) | [nth* input map (1B)] | + +----------------------------------------+------------------------------------------+ + | 1st output map (1B) | [nth* output map (1B)] | + +----------------------------------------+------------------------------------------+ */ +// clang-format on #define ITEM_SIZE 1 // 1 Byte #define INPUT_TENSOR_FORMAT_LEN_POS 0 #define OUTPUT_TENSOR_FORMAT_LEN_POS 1 -#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 2 * ITEM_SIZE) +#define INPUT_ARGS_LEN_POS 2 +#define INPUT_TENSOR_FORMAT_ARRAY_ADDR(base) (base + 3 * ITEM_SIZE) #define OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(base) \ - (base + 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS]) -#define PAYLOAD_ADDR(base) \ - (base + \ - ALIGN_SIZE( \ - 2 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS] + \ - base[OUTPUT_TENSOR_FORMAT_LEN_POS])) + (base + 3 * ITEM_SIZE + base[INPUT_TENSOR_FORMAT_LEN_POS]) +#define INPUT_TENSOR_MAP_ARRAY_ADDR(base) \ + (base + 3 * ITEM_SIZE + 1 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \ + 1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]) +#define OUTPUT_TENSOR_MAP_ARRAY_ADDR(base) \ + (base + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \ + 1 * base[OUTPUT_TENSOR_FORMAT_LEN_POS]) +#define PAYLOAD_ADDR(base) \ + (base + \ + ALIGN_SIZE( \ + 3 * ITEM_SIZE + 2 * base[INPUT_TENSOR_FORMAT_LEN_POS] + \ + 2 * base[OUTPUT_TENSOR_FORMAT_LEN_POS])) // Aggregate neutron model handle and data structures into one. typedef struct { int numInputs = 0; int numOutputs = 0; + int numInputArgs = 0; uint32_t scratchSize = 0; NeutronModelConfig mcfg; NeutronDataConfig dcfg; NeutronModelHandle nmh = NULL; const uint8_t* inputTranspositionFlags; const uint8_t* outputTranspositionFlags; + const uint8_t* inputMap; + const uint8_t* outputMap; } NeutronConfig; // Applied on outputs. @@ -210,6 +226,15 @@ void transposeOutput( } } +bool multipleChannelsPresent(const ArrayRef& sizes) { + size_t length = sizes.size(); + if (length < 3) { + return true; + } + size_t C = sizes[length - 3]; + return C != 1; +} + class NeutronBackend final : public PyTorchBackendInterface { public: NeutronBackend() {} @@ -234,17 +259,19 @@ class NeutronBackend final : public PyTorchBackendInterface { // cfg->mcfg.microcode // cfg->mcfg.weights // cfg->mcfg.kernels - const uint8_t* transpositionFlags = + const uint8_t* payloadFlags = static_cast(processed->data()); - int numInputs = transpositionFlags[INPUT_TENSOR_FORMAT_LEN_POS]; - int numOutputs = transpositionFlags[OUTPUT_TENSOR_FORMAT_LEN_POS]; - cfg->inputTranspositionFlags = - INPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags); + uint32_t numInputs = payloadFlags[INPUT_TENSOR_FORMAT_LEN_POS]; + uint32_t numOutputs = payloadFlags[OUTPUT_TENSOR_FORMAT_LEN_POS]; + cfg->numInputArgs = payloadFlags[INPUT_ARGS_LEN_POS]; + cfg->inputTranspositionFlags = INPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags); cfg->outputTranspositionFlags = - OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(transpositionFlags); + OUTPUT_TENSOR_FORMAT_ARRAY_ADDR(payloadFlags); + cfg->inputMap = INPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags); + cfg->outputMap = OUTPUT_TENSOR_MAP_ARRAY_ADDR(payloadFlags); const uint32_t* buffer = static_cast( - static_cast PAYLOAD_ADDR(transpositionFlags)); + static_cast PAYLOAD_ADDR(payloadFlags)); uint32_t magicWord = buffer[0]; // Check valid microcode. if (magicWord != 0x64434D6E) { @@ -314,39 +341,37 @@ class NeutronBackend final : public PyTorchBackendInterface { cfg->dcfg.outputs[cfg->numOutputs] = static_cast(context.allocate(cfg->scratchSize, 16)); - // Set inputs and outputs from args. + // Set inputs from args. + // Transpose inputs if needed. for (int i = 0; i < cfg->numInputs; i++) { - cfg->dcfg.inputs[i] = args[i]->toTensor().const_data_ptr(); - } - for (int i = 0; i < cfg->numOutputs; i++) { - cfg->dcfg.outputs[i] = - args[cfg->numInputs + i]->toTensor().mutable_data_ptr(); - } - - // Transpose inputs. - for (int i = 0; i < cfg->numInputs; i++) { - if (cfg->inputTranspositionFlags[i]) { - if (args[i]->toTensor().sizes().size() < 3) { + auto arg = args[cfg->inputMap[i]]->toTensor(); + if (cfg->inputTranspositionFlags[i] && + multipleChannelsPresent(arg.sizes())) { + if (arg.sizes().size() < 3) { ET_LOG(Error, "Unable to transpose 1D and 2D input to channel last"); return Error::InvalidProgram; } // Allocate buffer, the allocator is reset after each PTE instruction. - void* buffer = context.allocate(args[i]->toTensor().nbytes(), 16); + void* buffer = context.allocate(arg.nbytes()); transposeInput( - args[i]->toTensor().const_data_ptr(), - buffer, - args[i]->toTensor().sizes(), - args[i]->toTensor().element_size()); + arg.const_data_ptr(), buffer, arg.sizes(), arg.element_size()); cfg->dcfg.inputs[i] = buffer; + } else { + cfg->dcfg.inputs[i] = arg.const_data_ptr(); } } - // Redirect outputs. + + // Set outputs from args. + // Redirect outputs if needed before transposition. for (int i = 0; i < cfg->numOutputs; i++) { - if (cfg->outputTranspositionFlags[i]) { + auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor(); + if (cfg->outputTranspositionFlags[i] && + multipleChannelsPresent(arg.sizes())) { // Allocate buffer, the allocator is reset after each PTE instruction. - void* buffer = - context.allocate(args[cfg->numInputs + i]->toTensor().nbytes(), 16); + void* buffer = context.allocate(arg.nbytes()); cfg->dcfg.outputs[i] = buffer; + } else { + cfg->dcfg.outputs[i] = arg.mutable_data_ptr(); } } @@ -368,17 +393,19 @@ class NeutronBackend final : public PyTorchBackendInterface { // Transpose outputs. for (int i = 0; i < cfg->numOutputs; i++) { - if (cfg->outputTranspositionFlags[i]) { - if (args[cfg->numInputs + i]->toTensor().sizes().size() < 3) { + auto arg = args[cfg->numInputArgs + cfg->outputMap[i]]->toTensor(); + if (cfg->outputTranspositionFlags[i] && + multipleChannelsPresent(arg.sizes())) { + if (arg.sizes().size() < 3) { ET_LOG( Error, "Unable to transpose 1D and 2D output to channel first"); return Error::InvalidProgram; } transposeOutput( cfg->dcfg.outputs[i], - args[cfg->numInputs + i]->toTensor().mutable_data_ptr(), - args[cfg->numInputs + i]->toTensor().sizes(), - args[cfg->numInputs + i]->toTensor().element_size()); + arg.mutable_data_ptr(), + arg.sizes(), + arg.element_size()); } } diff --git a/backends/nxp/tests/test_neutron_backend.py b/backends/nxp/tests/test_neutron_backend.py index 963aea78b4f..53e54ec2f56 100644 --- a/backends/nxp/tests/test_neutron_backend.py +++ b/backends/nxp/tests/test_neutron_backend.py @@ -27,11 +27,14 @@ def test_neutron_backend__single_conv_model__payload_header_channels_last(): edge_program_manager.exported_program().graph_module.lowered_module_0.processed_bytes ) - assert payload[0] == 0x1 # Single input - assert payload[1] == 0x1 # Single output - assert payload[2] == 0x1 # Channels last - assert payload[3] == 0x1 # Channels last - assert all(byte == 0x0 for byte in payload[4:16]) # Aligned to 16 bytes + assert payload[0] == 0x1 # Number of Neutron node inputs + assert payload[1] == 0x1 # Number of Neutron node outputs + assert payload[2] == 0x1 # Number of model inputs + assert payload[3] == 0x1 # Channels last 0-th Neutron input + assert payload[4] == 0x1 # Channels last 0-th Neutron output + assert payload[5] == 0x0 # Map 0-th Neutron input to 0-th model input + assert payload[6] == 0x0 # Map 0-th Neutron output to 0-th model output + assert all(byte == 0x0 for byte in payload[7:16]) # Aligned to 16 bytes assert payload[17] != 0x0 # Followed by non-zero content @@ -41,9 +44,12 @@ def test_neutron_backend__linear_softmax_model__payload_header_formatless(): edge_program_manager.exported_program().graph_module.lowered_module_0.processed_bytes ) - assert payload[0] == 0x1 # Single input - assert payload[1] == 0x1 # Single output - assert payload[2] == 0x0 # Formatless - assert payload[3] == 0x0 # Formatless - assert all(byte == 0x0 for byte in payload[4:16]) # Aligned to 16 bytes + assert payload[0] == 0x1 # Number of Neutron node inputs + assert payload[1] == 0x1 # Number of Neutron node outputs + assert payload[2] == 0x1 # Number of model inputs + assert payload[3] == 0x0 # Formatless 0-th Neutron input + assert payload[4] == 0x0 # Formatless 0-th Neutron output + assert payload[5] == 0x0 # Map 0-th Neutron input to 0-th model input + assert payload[6] == 0x0 # Map 0-th Neutron output to 0-th model output + assert all(byte == 0x0 for byte in payload[7:16]) # Aligned to 16 bytes assert payload[17] != 0x0 # Followed by non-zero content