diff --git a/extension/llm/custom_ops/targets.bzl b/extension/llm/custom_ops/targets.bzl index c2843f5c2f7..6b9f9cb959c 100644 --- a/extension/llm/custom_ops/targets.bzl +++ b/extension/llm/custom_ops/targets.bzl @@ -1,4 +1,9 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load( + "@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", + "get_compiler_optimization_flags", +) + def define_common_targets(): """Defines targets that should be shared between fbcode and xplat. @@ -34,7 +39,7 @@ def define_common_targets(): "//executorch/kernels/portable/cpu/util:reduce_util", "//executorch/extension/llm/custom_ops/spinquant:fast_hadamard_transform", ], - compiler_flags = ["-Wno-missing-prototypes", "-Wno-global-constructors"], + compiler_flags = ["-Wno-missing-prototypes", "-Wno-global-constructors"] + get_compiler_optimization_flags(), visibility = [ "//executorch/...", "//executorch/extension/llm/custom_ops/...", diff --git a/kernels/optimized/lib_defs.bzl b/kernels/optimized/lib_defs.bzl index d9721e5055d..c3799f7db51 100644 --- a/kernels/optimized/lib_defs.bzl +++ b/kernels/optimized/lib_defs.bzl @@ -2,6 +2,10 @@ load("@fbsource//tools/build_defs:default_platform_defs.bzl", "DEVSERVER_PLATFOR load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native") load("@fbsource//xplat/executorch/backends/xnnpack/third-party:third_party_libs.bzl", "third_party_dep") load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") +load( + "@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", + "get_compiler_optimization_flags", +) # Because vec exists as a collection of header files, compile and preprocessor # flags applied to the vec target do not have any effect, since no compilation @@ -121,6 +125,7 @@ def define_libs(): exported_headers = native.glob([ "blas/**/*.h", ]), + compiler_flags = get_compiler_optimization_flags(), header_namespace = "executorch/kernels/optimized", visibility = [ "//executorch/...", diff --git a/kernels/optimized/op_registration_util.bzl b/kernels/optimized/op_registration_util.bzl index c969aa81a9a..6e74836bb79 100644 --- a/kernels/optimized/op_registration_util.bzl +++ b/kernels/optimized/op_registration_util.bzl @@ -4,6 +4,10 @@ load( "@fbsource//xplat/executorch/kernels/optimized:lib_defs.bzl", "get_vec_android_preprocessor_flags", ) +load( + "@fbsource//xplat/executorch/kernels/portable:op_registration_util.bzl", + "get_compiler_optimization_flags", +) def op_target(name, deps = []): """Registers an optimized implementation for an operator overload group. @@ -87,7 +91,7 @@ def define_op_library(name, deps): ], # kernels often have helpers with no prototypes just disabling the warning here as the headers # are codegend and linked in later - compiler_flags = ["-Wno-missing-prototypes"], + compiler_flags = ["-Wno-missing-prototypes"] + get_compiler_optimization_flags(), deps = [ "//executorch/runtime/kernel:kernel_includes", ] + augmented_deps, diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl index ef170d62970..6a25f35c304 100644 --- a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl +++ b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl @@ -1,6 +1,24 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "is_xplat", "runtime") load("@fbsource//xplat/executorch/build:selects.bzl", "selects") +def get_compiler_optimization_flags(): + # various ovr_configs are not available in oss + if not runtime.is_oss: + compiler_flags = select({ + "DEFAULT": [], + "ovr_config//os:android-arm64": [ + "-O2", + ], + "ovr_config//os:iphoneos": [ + "-O2", + ], + "ovr_config//os:macos-arm64": [ + "-O2", + ], + }) + return compiler_flags + return [] + def op_target(name, deps = [], android_deps = [], _allow_third_party_deps = False, _aten_mode_deps = []): """Registers an implementation of an operator overload group. @@ -132,7 +150,7 @@ def define_op_library(name, deps, android_deps, aten_target, _allow_third_party_ # library, and it blocks users like unit tests to use kernel # implementation directly. So we enable this for xplat only. ["-fvisibility=hidden"] if is_xplat() else [] - ), + ) + get_compiler_optimization_flags(), deps = [ "//executorch/runtime/kernel:kernel_includes" + aten_suffix, ] + deps,