From 092e032b24d1c1b5de077444cc61ef70c10dd0ed Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Tue, 4 Feb 2025 09:34:13 -0800 Subject: [PATCH] Error out if registering prim ops multiple times (#8172) Summary: Using `executorch_ops_check` should error out if the dependencies includes 2 libraries of `prim_op_registry`. For example: The target `//arvr/libraries/wristband/tsn/transformers:TorchstreamTransformerTest` depends on both `prim_ops_registry` and `prim_ops_registry_aten`. BUCK query: ``` buck2 uquery "filter('prim_ops_registry(?:_static|_aten)?$', deps(//arvr/libraries/wristband/tsn/transformers: TorchstreamTransformerTest))" Buck UI: https://www.internalfb.com/buck2/1401c12c-0ef2-4ba8-8d4e-6b86871d708f Network: Up: 0B Down: 0B Command: uquery. Time elapsed: 3.6s fbcode//executorch/kernels/prim_ops:prim_ops_registry fbcode//executorch/kernels/prim_ops:prim_ops_registry_aten fbsource//xplat/executorch/kernels/prim_ops:prim_ops_registry fbsource//xplat/executorch/kernels/prim_ops:prim_ops_registry_aten ``` This will cause the error like this: ``` E 00:00:00.032942 executorch:operator_registry.cpp:86] Re-registering aten::sym_size.int, from /data/users/larryliu/fbsource/buck-out/v2/gen/fbsource/cfdc20bd56300721/arvr/libraries/wristband/tsn/transformers/__TorchstreamTransformerTest__/./__TorchstreamTransformerTest__shared_libs_symlink_tree/libexecutorc$ E 00:00:00.033022 executorch:operator_registry.cpp:87] key: (null), is_fallback: true F 00:00:00.033033 executorch:operator_registry.cpp:111] In function register_kernels(), assert failed (false): Kernel registration failed with error 18, see error log for details. ``` To catch issues like this, we should error out when user uses `executorch_ops_check` to debug. ``` executorch_ops_check( name = "my_executorch_test_check", deps = [":TorchstreamTransformerTest"], ) ``` Fixing the internal portion of [#8171](https://github.com/pytorch/executorch/issues/8171) Reviewed By: lucylq Differential Revision: D69090850 --- codegen/tools/gen_all_oplist.py | 44 +++++++++++++++++++++-- shim/xplat/executorch/codegen/codegen.bzl | 1 + 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/codegen/tools/gen_all_oplist.py b/codegen/tools/gen_all_oplist.py index b417444f8dc..5cb93bb9153 100644 --- a/codegen/tools/gen_all_oplist.py +++ b/codegen/tools/gen_all_oplist.py @@ -47,6 +47,31 @@ def resolve_model_file_path_to_buck_target(model_file_path: str) -> str: return real_path +def _raise_if_check_prim_ops_fail(options): + + # Error out if we have more than one targets registering prim ops. + if options.DEBUG_ONLY_check_prim_ops and len(options.DEBUG_ONLY_check_prim_ops) > 1: + assert ( + options.DEBUG_ONLY_check_prim_ops[0] == "@" + ), "DEBUG_ONLY_check_prim_ops is not a valid file path, or it doesn't start with '@'. This is likely a BUCK issue." + + prim_ops_targets_file = options.DEBUG_ONLY_check_prim_ops[1:] + with open(prim_ops_targets_file, "r") as file: + prim_ops_targets = file.read().split() + if len(prim_ops_targets) > 1: + # Yellow bold: \033[33;1m + # Red bold: \033[31;1m + # Green bold: \033[32;1m + error = ( + "It seems this target is depending on more than 1 `prim_ops_registry` targets: " + + f'\033[33;1m\n{", ".join(prim_ops_targets)}\033[0m. \nThis will likely cause errors such as: ' + + "\n \033[31;1mRe-registering aten::sym_size.int...\033[0m" + + "\nTo find out the dependency chain, run the following command: " + + f'\n \033[32;1mbuck2 cquery "allpaths(, {prim_ops_targets[0]})"\033[0m' + ) + raise Exception(error) + + def main(argv: List[Any]) -> None: """This binary generates 3 files: @@ -95,8 +120,18 @@ def main(argv: List[Any]) -> None: default=False, required=False, ) + parser.add_argument( + "--DEBUG-ONLY-check-prim-ops", + "--DEBUG_ONLY_check_prim_ops", + help=( + "Useful argument to take BUCK targets that registers prim ops and error out if we have more than 1." + ), + required=False, + ) options = parser.parse_args(argv) + _raise_if_check_prim_ops_fail(options) + # Check if the build has any dependency on any selective build target. If we have a target, BUCK shold give us either: # 1. a yaml file containing selected ops (could be empty), or # 2. a non-empty list of yaml files in the `model_file_list_path` or @@ -153,14 +188,17 @@ def main(argv: List[Any]) -> None: debug_info_2 = ",".join( model_dict["operators"][op_name]["debug_info"] ) - error = f"Operator {op_name} is used in 2 models: {debug_info_1} and {debug_info_2}" + # Yellow bold: \033[33;1m + # Red bold: \033[31;1m + # Green bold: \033[32;1m + error = f"\033[31;1mOperator {op_name} is used in 2 models: \033[33;1m{debug_info_1} and {debug_info_2}\033[0m" if "//" not in debug_info_1 and "//" not in debug_info_2: error += "\nWe can't determine what BUCK targets these model files belong to." tail = "." else: error += "\nPlease run the following commands to find out where is the BUCK target being added as a dependency to your target:\n" - error += f'\n buck2 cquery "allpaths(, {debug_info_1})"' - error += f'\n buck2 cquery "allpaths(, {debug_info_2})"' + error += f'\n \033[32;1mbuck2 cquery "allpaths(, {debug_info_1})"\033[0m' + error += f'\n \033[32;1mbuck2 cquery "allpaths(, {debug_info_2})"\033[0m' tail = "as well as results from BUCK commands listed above." error += ( diff --git a/shim/xplat/executorch/codegen/codegen.bzl b/shim/xplat/executorch/codegen/codegen.bzl index 8e0e89eda57..4b69a2cf4a0 100644 --- a/shim/xplat/executorch/codegen/codegen.bzl +++ b/shim/xplat/executorch/codegen/codegen.bzl @@ -692,6 +692,7 @@ def executorch_ops_check( "--model_file_list_path $(@query_outputs \"filter('.*_et_oplist', deps(set({deps})))\") " + "--allow_include_all_overloads " + "--check_ops_not_overlapping " + + "--DEBUG_ONLY_check_prim_ops $(@query_targets \"filter('prim_ops_registry(?:_static|_aten)?$', deps(set({deps})))\") " + "--output_dir $OUT ").format(deps = " ".join(["\'{}\'".format(d) for d in deps])), define_static_target = False, platforms = kwargs.pop("platforms", get_default_executorch_platforms()),