From d6b9d614de51e2e2d45cb499307b55cda1415a6d Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 7 Jul 2025 21:06:32 -0700 Subject: [PATCH] Update [ghstack-poisoned] --- backends/test/harness/tester.py | 1 + backends/test/operators/facto_specs.py | 16 +- backends/test/operators/test_facto.py | 127 ++++++----- backends/test/runner/CMakeLists.txt | 16 ++ backends/test/runner/test_runner.cpp | 260 +++++++++++++++++++++++ backends/xnnpack/test/tester/__init__.py | 7 +- pytest.ini | 5 + 7 files changed, 359 insertions(+), 73 deletions(-) create mode 100644 backends/test/runner/CMakeLists.txt create mode 100644 backends/test/runner/test_runner.cpp diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index f1dfeb23531..3f717d824bc 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -361,6 +361,7 @@ def _assert_outputs_equal(model_output, ref_output, atol=1e-03, rtol=1e-03): ref, atol=atol, rtol=rtol, + equal_nan=True, ), ( f"Output {i} does not match reference output.\n" f"\tGiven atol: {atol}, rtol: {rtol}.\n" diff --git a/backends/test/operators/facto_specs.py b/backends/test/operators/facto_specs.py index 96fe86b2ea7..3427c302f6a 100644 --- a/backends/test/operators/facto_specs.py +++ b/backends/test/operators/facto_specs.py @@ -2,14 +2,20 @@ import torch from facto.inputgen.argument.type import ArgType -from facto.inputgen.specs.model import ConstraintProducer as cp, InPosArg, OutArg, Spec +from facto.inputgen.specs.model import ( + ConstraintProducer as cp, + InKwArg, + InPosArg, + OutArg, + Spec, +) """ This file contains FACTO operator specs for ops not in the standard FACTO db. This mainly includes ops not in the Core ATen op set and preserved by a backend, such as linear. """ -LiNEAR_DEFAULT_SPEC = Spec( +LINEAR_DEFAULT_SPEC = Spec( op="linear.default", # (Tensor input, Tensor weight, Tensor? bias=None) -> Tensor inspec=[ InPosArg( @@ -53,7 +59,9 @@ ) _extra_specs = [ - LiNEAR_DEFAULT_SPEC, + LINEAR_DEFAULT_SPEC, ] -ExtraSpecDB: dict[str, Spec] = {s.op: s for s in _extra_specs} +ExtraSpecDB: dict[str, Spec] = { + s.op: s for s in _extra_specs +} \ No newline at end of file diff --git a/backends/test/operators/test_facto.py b/backends/test/operators/test_facto.py index 208aaa042a9..ec4459f6086 100644 --- a/backends/test/operators/test_facto.py +++ b/backends/test/operators/test_facto.py @@ -4,29 +4,21 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# pyre-unsafe - -# -# This file contains logic to run generated operator tests using the FACTO -# library (https://github.com/pytorch-labs/FACTO). To run the tests, first -# clone and install FACTO by running pip install . from the FACTO source -# directory. Then, from the executorch root directory, run the following: -# -# python -m unittest backends.test.operators.test_facto.FactoTestsXNNPACK -# +# pyre-strict import copy import functools import traceback +from typing import Any, Callable, List, OrderedDict, Sequence, Tuple import unittest -from typing import Any, Callable, Sequence import torch from executorch.backends.test.harness.tester import Tester as TesterBase -from executorch.backends.xnnpack.test.tester.tester import Tester as XnnpackTester +from executorch.backends.xnnpack.test.tester.tester import ToEdgeTransformAndLower, Tester as XnnpackTester from facto.inputgen.argtuple.gen import ArgumentTupleGenerator -from facto.inputgen.specs.model import ConstraintProducer as cp, Spec +from facto.inputgen.specs.model import Constraint, ConstraintProducer as cp, Spec from facto.inputgen.utils.random_manager import random_manager +from facto.inputgen.variable.type import ScalarDtype from facto.specdb.db import SpecDictDB from torch._ops import OpOverload @@ -35,9 +27,9 @@ CombinedSpecDB = SpecDictDB | ExtraSpecDB COMMON_TENSOR_CONSTRAINTS = [ - cp.Rank.Ge(lambda deps: 1), # Avoid zero and high rank tensors. + cp.Rank.Ge(lambda deps: 1), cp.Rank.Le(lambda deps: 4), - cp.Size.Ge(lambda deps, r, d: 1), # Keep sizes reasonable. + cp.Size.Ge(lambda deps, r, d: 1), cp.Size.Le(lambda deps, r, d: 2**9), ] @@ -54,7 +46,6 @@ "other", } - def _patch_spec(spec: Spec) -> Spec: spec = copy.deepcopy(spec) for inspec in spec.inspec: @@ -64,18 +55,16 @@ def _patch_spec(spec: Spec) -> Spec: inspec.constraints.extend(COMMON_SCALAR_CONSTRAINS) return spec - class OpModel(torch.nn.Module): """ Wraps a single torch operator in an nn.Module. """ - def __init__( - self, - op: OpOverload, - runtime_input_count: int, + self, + op: OpOverload, + runtime_input_count: int, fixed_args: Sequence[Any], - fixed_kwargs: dict[str, Any], + fixed_kwargs: dict[str, Any] ): super().__init__() self.op = op @@ -99,12 +88,9 @@ def __init__( def forward(self, *args, **kwargs): return self.op(*(args + self.fixed_args), **(kwargs | self.fixed_kwargs)) - class ConvModel(OpModel): def forward(self, *args, **kwargs): - weight, bias, stride, padding, dilation, transposed, output_padding, groups = ( - self.fixed_args - ) + weight, bias, stride, padding, dilation, transposed, output_padding, groups = self.fixed_args if not transposed: if len(weight.shape) == 3: @@ -113,7 +99,7 @@ def forward(self, *args, **kwargs): op = torch.nn.functional.conv2d elif len(weight.shape) == 5: op = torch.nn.functional.conv3d - + return op(args[0], weight, bias, stride, padding, dilation, groups) else: if len(weight.shape) == 3: @@ -122,11 +108,8 @@ def forward(self, *args, **kwargs): op = torch.nn.functional.conv_transpose2d elif len(weight.shape) == 5: op = torch.nn.functional.conv_transpose3d - - return op( - args[0], weight, bias, stride, padding, output_padding, groups, dilation - ) - + + return op(args[0], weight, bias, stride, padding, output_padding, groups, dilation) def get_module_for_op(op: OpOverload): if op == torch.ops.aten.convolution.default: @@ -134,7 +117,6 @@ def get_module_for_op(op: OpOverload): else: return OpModel - class FactoTestsBase(unittest.TestCase): def __init__(self, tester_factory: Callable[[], TesterBase], *args, **kwargs): super().__init__(*args, **kwargs) @@ -147,37 +129,36 @@ def _generate_test(op_name: str) -> None: torch_op = functools.reduce(getattr, sections, torch.ops.aten) test_name = "test_" + op_name.replace(".", "_") - - def test_body(self): - self._test_op(torch_op) + test_body = lambda self: self._test_op(torch_op) setattr(FactoTestsBase, test_name, test_body) - + @staticmethod def get_runtime_input_count(spec: Spec): # Determine which inputs are fixed at tracing time (weights, for example), # vs inputs to the runtime graph. We currently assume that the runtime graph # inputs start at the beginning of the arg list and are contiguous. - # + # # Args are consider to be runtime inputs if they are positional and are named # one of RUNTIME_INPUT_NAMES. If none match, we assume only the first arg is a # runtime input. runtime_input_count = 0 for inspec in spec.inspec: is_runtime_input = ( - inspec.type.is_tensor() and inspec.name.lower() in RUNTIME_INPUT_NAMES + inspec.type.is_tensor() and + inspec.name.lower() in RUNTIME_INPUT_NAMES ) if is_runtime_input: runtime_input_count += 1 else: break - + return max(1, runtime_input_count) def setUp(self): torch.set_printoptions(threshold=3) - - def _test_op(self, op: OpOverload) -> None: # noqa: C901 + + def _test_op(self, op: OpOverload) -> None: random_manager.seed(0) # Strip namespace @@ -186,15 +167,15 @@ def _test_op(self, op: OpOverload) -> None: # noqa: C901 # Default to .default overload if "." not in op_name: op_name += ".default" - + # Find and patch op spec - if op_name not in CombinedSpecDB: + if not op_name in CombinedSpecDB: raise ValueError(f"Operator {op_name} not found in SpecDictDB.") spec = _patch_spec(CombinedSpecDB[op_name]) runtime_input_count = FactoTestsBase.get_runtime_input_count(spec) - print(f"Op: {op_name}, {runtime_input_count} runtime inputs") + print(f"Op: {op_name}, {runtime_input_count} runtime inputs") # Run test cases success_count_delegated = 0 @@ -207,14 +188,18 @@ def _test_op(self, op: OpOverload) -> None: # noqa: C901 try: if isinstance(posargs[0], torch.Tensor): - # Temporary for getting around XNN crashes - if posargs[0].dtype not in {torch.float32, torch.float16}: - print("SKIPPING NON FLOAT CASE") + # Temporary for getting around XNN crashes (https://github.com/pytorch/executorch/issues/10960). + # TODO Re-enable when resolved. + if posargs[0].dtype in {torch.int8, torch.uint8}: + print("Skipping (u)int8 case.") continue module_cls = get_module_for_op(op) model = module_cls( - op, runtime_input_count, posargs[runtime_input_count:], inkwargs + op, + runtime_input_count, + posargs[runtime_input_count:], + inkwargs ) # Sanity check to make sure it runs in eager. This can present nicer error @@ -225,13 +210,20 @@ def _test_op(self, op: OpOverload) -> None: # noqa: C901 print(f"Eager execution failed: {e}") continue - tester = ( - self._tester_factory(model, tuple(posargs[:runtime_input_count])) - .export() - .dump_artifact() - .to_edge_transform_and_lower() + tester = self._tester_factory( + model, + tuple(posargs[:runtime_input_count]) ) + # Dynamo will also fail to handle some patterns that are valid in eager. + try: + tester.export() + except Exception as e: + print(f"Export failed.") + continue + + tester.to_edge_transform_and_lower() + is_delegated = any( n.target == torch._higher_order_ops.executorch_call_delegate for n in tester.stages[tester.cur].graph_module.graph.nodes @@ -241,19 +233,20 @@ def _test_op(self, op: OpOverload) -> None: # noqa: C901 # Only run the runtime test if the op was delegated. if is_delegated: ( - tester.to_executorch() + tester + .to_executorch() .serialize() .run_method_and_compare_outputs() ) - + if is_delegated: success_count_delegated += 1 else: success_count_undelegated += 1 + #finally: except Exception as e: fail_count += 1 - print(f"Error: {e}") - print("Args:") + print(f"Args:") for arg in posargs: if isinstance(arg, torch.Tensor): print(f" {arg.dtype} {arg.shape}") @@ -262,20 +255,22 @@ def _test_op(self, op: OpOverload) -> None: # noqa: C901 traceback.print_exc() - print( - f"{success_count_delegated + success_count_undelegated} PASS, {fail_count} FAIL" - ) - print( - f" {success_count_delegated} DELEGATED, {success_count_undelegated} UNDELEGATED" - ) - + print(f"{success_count_delegated + success_count_undelegated} PASS, {fail_count} FAIL") + print(f" {success_count_delegated} DELEGATED, {success_count_undelegated} UNDELEGATED") # Programatically generate tests for each operator. for op_name in CombinedSpecDB.keys(): FactoTestsBase._generate_test(op_name) - # TODO Figure out where to put these class FactoTestsXNNPACK(FactoTestsBase): def __init__(self, *args, **kwargs): super().__init__(XnnpackTester, *args, **kwargs) + +try: + from executorch.backends.apple.coreml.test.tester import CoreMLTester + class FactoTestsCoreML(FactoTestsBase): + def __init__(self, *args, **kwargs): + super().__init__(CoreMLTester, *args, **kwargs) +except: + print("Skipping Core ML facto tests as Core ML AOT is not available.") \ No newline at end of file diff --git a/backends/test/runner/CMakeLists.txt b/backends/test/runner/CMakeLists.txt new file mode 100644 index 00000000000..d0ee29f8d6a --- /dev/null +++ b/backends/test/runner/CMakeLists.txt @@ -0,0 +1,16 @@ +add_executable(executorch-test-runner + test_runner.cpp + # TODO + ../../../runtime/platform/runtime.cpp +) + +target_link_libraries( + executorch-test-runner + PRIVATE executorch + gflags + extension_flat_tensor + extension_flat_tensor_serialize + extension_module + extension_tensor + optimized_native_cpu_ops_lib + xnnpack_backend) diff --git a/backends/test/runner/test_runner.cpp b/backends/test/runner/test_runner.cpp new file mode 100644 index 00000000000..e17a4f91a55 --- /dev/null +++ b/backends/test/runner/test_runner.cpp @@ -0,0 +1,260 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* + * This runner is intended to built and run as part of the backend test flow. It takes a + * set of inputs from a flat_tensor-format file, runs each case, and then serializes the + * outputs to a file, also in flat_tensor format. + */ + +DEFINE_string( + model_path, + "model.pte", + "Model serialized in flatbuffer format."); + +DEFINE_string( + input_path, + "inputs.ptd", + "Input tensors in flat tensor (ptd) format."); + +DEFINE_string( + output_path, + "outputs.ptd", + "Path to write output tensor in flat tensor (ptd) format."); + +DEFINE_string( + method, + "forward", + "The model method to run."); + +using executorch::aten::Tensor; +using executorch::runtime::Error; +using executorch::runtime::EValue; +using executorch::runtime::Result; +using executorch::extension::FileDataLoader; +using executorch::extension::FlatTensorDataMap; +using executorch::extension::Module; +using executorch::extension::TensorPtr; +using executorch::ET_RUNTIME_NAMESPACE::TensorLayout; + +// Contains method inputs for a single run. +struct TestCase { + std::map inputs; +}; + +std::map collect_test_cases(FlatTensorDataMap& input_map); +TensorPtr create_tensor(TensorLayout& layout, std::unique_ptr buffer); +Result load_input_data(FileDataLoader& loader); +std::optional> parse_key(const std::string& key); +Result> run_test_case(Module& module, TestCase& test_case); +void store_outputs(std::map& output_map, const std::string& case_name, const std::vector& outputs); + +const int TensorAlignment = 16; + +int main(int argc, char** argv){ + gflags::ParseCommandLineFlags(&argc, &argv, true); + executorch::runtime::runtime_init(); + + // Load the model. + Module model(FLAGS_model_path.c_str()); + auto load_method_error = model.load_method(FLAGS_method.c_str()); + if (load_method_error != Error::Ok) { + std::cerr << "Failed to load method \"" << FLAGS_method << "\": " << static_cast(load_method_error) << std::endl; + return -1; + } + + // Load the input tensor data. Note that the data loader has to live as long as the flat + // tensor data map does. + auto input_loader_result = FileDataLoader::from(FLAGS_input_path.c_str()); + if (!input_loader_result.ok()) { + std::cerr << "Failed to open input file: error " << static_cast(input_loader_result.error()) << std::endl; + } + + auto load_result = load_input_data(*input_loader_result); + if (!load_result.ok()) { + return -1; + } + auto input_map = std::move(load_result.get()); + + auto cases = collect_test_cases(input_map); + std::map output_map; + + // Run each case and store the outputs. + for (auto& [name, test_case] : cases) { + auto result = run_test_case(model, test_case); + if (!result.ok()) { + std::cerr << "Failed to run test case \"" << name << "\": " << static_cast(result.error()) << std::endl; + return -1; + } + + store_outputs(output_map, name, result.get()); + } + + // Create a map of Tensor (unowned), rather than TensorPtr (owned). + std::map output_map_tensors; + for (auto& [key, value] : output_map) { + output_map_tensors.emplace(key, *value); + } + + // Write the output data in .ptd format. + auto save_result = executorch::extension::flat_tensor::save_ptd( + FLAGS_output_path.c_str(), + output_map_tensors, + TensorAlignment + ); + + if (save_result != Error::Ok) { + std::cerr << "Failed to save outputs: " << static_cast(save_result) << std::endl; + return -1; + } + + std::cout << "Successfully wrote output tensors to " << FLAGS_output_path << "." << std::endl; +} + +// Group inputs by test case and build tensors. +std::map collect_test_cases(FlatTensorDataMap& input_map) { + std::map cases; + + for (auto i = 0u; i < input_map.get_num_keys().get(); i++) { + auto key = input_map.get_key(i).get(); + + // Split key into test_case : input index + auto [test_case_name, input_index] = *parse_key(key); + + // Get or create the test case instance. + auto& test_case = cases[test_case_name]; + + // Create a tensor from the layout and data. + auto tensor_layout = input_map.get_tensor_layout(key).get(); + auto tensor_data = std::unique_ptr((char*) malloc(tensor_layout.nbytes()), free); + auto load_result = input_map.load_data_into(key, tensor_data.get(), tensor_layout.nbytes()); + if (load_result != Error::Ok) { + std::cerr << "Load failed: " << static_cast(load_result) << std::endl; + exit(-1); + } + + auto input_tensor = create_tensor(tensor_layout, std::move(tensor_data)); + test_case.inputs[input_index] = std::move(input_tensor); + } + + return cases; +} + +// Create a tensor from a layout and data blob. +TensorPtr create_tensor(TensorLayout& layout, std::unique_ptr buffer) { + // Sizes and dim order are have different types in TensorLayout vs Tensor. + std::vector sizes; + for (auto x : layout.sizes()) { + sizes.push_back(x); + } + std::vector dim_order; + for (auto x : layout.dim_order()) { + dim_order.push_back(x); + } + + auto raw_data = buffer.release(); + + return executorch::extension::make_tensor_ptr( + sizes, + raw_data, + dim_order, + {}, // Strides - infer from sizes + dim order. + layout.scalar_type(), + exec_aten::TensorShapeDynamism::STATIC, + [](void* ptr) { + free(ptr); + } + ); +} + +// Load the input data (in .ptd file format) from the given path. +Result load_input_data(FileDataLoader& loader) { + auto input_data_map_load_result = FlatTensorDataMap::load(&loader); + if (!input_data_map_load_result.ok()) { + std::cerr << "Failed to open load input data map: error " << static_cast(input_data_map_load_result.error()) << std::endl; + } + + return input_data_map_load_result; +} + +// Parse a string key of the form "test_case:input index". Returns a tuple of the test case name +// and input index. +std::optional> parse_key(const std::string& key) { + auto delimiter = key.find(":"); + if (delimiter == std::string::npos) { return std::nullopt; } + + auto test_case = key.substr(0, delimiter); + auto index_str = key.substr(delimiter + 1); + auto index = std::stoi(index_str); + + return {{ test_case, index }}; +} + +// Run a given test case and return the resulting output values. +Result> run_test_case(Module& module, TestCase& test_case) { + for (auto& [index, value] : test_case.inputs) { + auto set_input_error = module.set_input(FLAGS_method, value, index); + if (set_input_error != Error::Ok) { + std::cerr << "Failed to set input " << index << ": " << static_cast(set_input_error) << "." << std::endl; + } + } + + return module.execute(FLAGS_method.c_str()); +} + +// Store output tensors into the named data map. +void store_outputs( + std::map& output_map, + const std::string& case_name, + const std::vector& outputs) { + // Because the outputs are likely memory planned, we need to clone the tensor + // here to avoid having the data clobbered by the next run. + + for (auto i = 0u; i < outputs.size(); i++) { + if (!outputs[i].isTensor()) { + continue; + } + + auto key_name = case_name + ":" + std::to_string(i); + auto& tensor = outputs[i].toTensor(); + + // Copy tensor storage. + auto tensor_memory = malloc(tensor.nbytes()); + memcpy(tensor_memory, tensor.const_data_ptr(), tensor.nbytes()); + + // Copy tensor metadata. + std::vector sizes( + tensor.sizes().begin(), + tensor.sizes().end() + ); + + std::vector dim_order( + tensor.dim_order().begin(), + tensor.dim_order().end() + ); + + output_map.emplace(key_name, executorch::extension::make_tensor_ptr( + sizes, + tensor_memory, + dim_order, + {}, // Strides - implicit + tensor.scalar_type(), + exec_aten::TensorShapeDynamism::STATIC, + [](void* ptr) { + free(ptr); + } + )); + } +} diff --git a/backends/xnnpack/test/tester/__init__.py b/backends/xnnpack/test/tester/__init__.py index a4527d9edc8..5d1dcfe84cd 100644 --- a/backends/xnnpack/test/tester/__init__.py +++ b/backends/xnnpack/test/tester/__init__.py @@ -6,6 +6,7 @@ from executorch.backends.xnnpack.test.tester.tester import ( Export, + ToEdge, Partition, Quantize, RunPasses, @@ -18,12 +19,12 @@ __all__ = [ "Export", - "ToEdge", "Partition", "Quantize", "RunPasses", - "ToEdgeTransformAndLower", - "Tester", "Serialize", + "Tester", + "ToEdge", + "ToEdgeTransformAndLower", "ToExecutorch", ] diff --git a/pytest.ini b/pytest.ini index 557a307bdf2..de7d932b946 100644 --- a/pytest.ini +++ b/pytest.ini @@ -39,6 +39,11 @@ addopts = # but maybe it is a bit of anti-pattern --ignore=kernels/quantized/test/test_quant_dequant_per_token.py kernels/test/test_case_gen.py + # backends/test + # This effort is WIP and will be enabled in CI once testing infra + # is stable and signal to noise ratio is good (no irrelevant failures). + # See https://github.com/pytorch/executorch/discussions/11140 + --ignore=backends/test # backends/xnnpack backends/xnnpack/test/ops --ignore=backends/xnnpack/test/ops/test_bmm.py