Skip to content

Commit

Permalink
Rationalize inlining of kernels into the unboxing wrapper (#42845)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #42845

- In server builds, always allow the compiler to inline the kernel into the unboxing wrapper, i.e. optimize for perf.
- In mobile builds, never inline the kernel into the unboxing wrapper, i.e. optimize for binary size.

Note that this only applies for registration API calls where we can actually inline it, i.e. calls with `TORCH_FN` or some of the old API calls.
Registrations that give the registration API a runtime function pointer can't inline and won't do so on server either.

Note also that in server builds, all we do is **allow** the compiler to inline. We don't force inlining.

ghstack-source-id: 114177591

Test Plan:
waitforsandcastle

https://www.internalfb.com/intern/fblearner/details/225217260/

Reviewed By: ezyang

Differential Revision: D23045772

fbshipit-source-id: f74fd600eaa3f5cfdf0da47ea080801a03db7917
  • Loading branch information
smessmer authored and facebook-github-bot committed Oct 15, 2020
1 parent 053c252 commit 2d6fd22
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
18 changes: 16 additions & 2 deletions aten/src/ATen/core/boxing/KernelFunction_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,41 @@ inline KernelFunction KernelFunction::makeFromUnboxedOnlyFunctor(std::unique_ptr
}

template<class FuncPtr, bool AllowLegacyTypes>
inline KernelFunction KernelFunction::makeFromUnboxedFunction(FuncPtr) {
inline KernelFunction KernelFunction::makeFromUnboxedFunction(FuncPtr func_ptr) {
static_assert(is_compile_time_function_pointer<FuncPtr>::value, "Tried to call KernelFunction::makeFromUnboxedFunction with an invalid parameter. It must be a function pointer created with TORCH_FN.");
static_assert(!std::is_same<typename FuncPtr::FuncType, BoxedKernelFunction>::value, "Tried to call KernelFunction::makeFromUnboxedFunction with a boxed function pointer. Please use KernelFunction::makeFromBoxedFunction instead.");
static_assert(FuncPtr::func_ptr() != nullptr, "Kernel function cannot be nullptr");

#if !defined(C10_MOBILE)
return makeFromUnboxedFunctor<AllowLegacyTypes, typename impl::WrapFunctionIntoFunctor<FuncPtr>::type>(
guts::make_unique_base<OperatorKernel, typename impl::WrapFunctionIntoFunctor<FuncPtr>::type>()
);
#else
// On mobile, we rather want to optimize for binary size than for performance,
// so let's not inline the kernel into the wrapper but use makeFromUnboxedRuntimeFunction
// instead.
return makeFromUnboxedRuntimeFunction(func_ptr.func_ptr());
#endif
}

template<class FuncPtr>
inline KernelFunction KernelFunction::makeFromUnboxedOnlyFunction(FuncPtr) {
inline KernelFunction KernelFunction::makeFromUnboxedOnlyFunction(FuncPtr func_ptr) {
// TODO We want to get rid of kernels that have only an unboxed function pointer.
// All kernels should have a boxed pointer.
static_assert(is_compile_time_function_pointer<FuncPtr>::value, "Tried to call KernelFunction::makeFromUnboxedOnlyFunction with an invalid parameter. It must be a function pointer created with TORCH_FN.");
static_assert(!std::is_same<typename FuncPtr::FuncType, BoxedKernelFunction>::value, "Tried to call KernelFunction::makeFromUnboxedOnlyFunction with a boxed function pointer. Please use KernelFunction::makeFromBoxedFunction instead.");
static_assert(FuncPtr::func_ptr() != nullptr, "Kernel function cannot be nullptr");

#if !defined(C10_MOBILE)
return makeFromUnboxedOnlyFunctor<typename impl::WrapFunctionIntoFunctor<FuncPtr>::type> (
guts::make_unique_base<OperatorKernel, typename impl::WrapFunctionIntoFunctor<FuncPtr>::type>()
);
#else
// On mobile, we rather want to optimize for binary size than for performance,
// so let's not inline the kernel into the wrapper but use makeFromUnboxedOnlyRuntimeFunction
// instead.
return makeFromUnboxedOnlyRuntimeFunction(func_ptr.func_ptr());
#endif
}

template<bool AllowLegacyTypes, class FuncType>
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/op_registration/op_registration.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class CAFFE2_API RegisterOperators final {

return std::move(*this).kernel(
std::move(dispatch_key),
KernelFunction::makeFromUnboxedFunction(CompileTimeFunctionPointer<FuncType, kernel_func>()),
KernelFunction::makeFromUnboxedFunction(TORCH_FN(kernel_func)),
impl::CppSignature::make<FuncType>(),
// TODO Do schema inference without relying on WrapFunctionIntoFunctor
detail::inferFunctionSchemaFromFunctor<typename impl::WrapFunctionIntoFunctor<CompileTimeFunctionPointer<FuncType, kernel_func>>::type>()
Expand Down
2 changes: 1 addition & 1 deletion torch/library.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class CAFFE2_API CppFunction final {
/// This overload accepts compile time function pointers, e.g., `CppFunction(TORCH_FN(add_impl))`
template <typename FuncPtr>
explicit CppFunction(FuncPtr f, std::enable_if_t<c10::is_compile_time_function_pointer<FuncPtr>::value, std::nullptr_t> = nullptr)
: func_(c10::KernelFunction::makeFromUnboxedRuntimeFunction(f.func_ptr()))
: func_(c10::KernelFunction::makeFromUnboxedFunction(f))
, cpp_signature_(c10::impl::CppSignature::make<typename FuncPtr::FuncType>())
// TODO: Don't go through WrapRuntimeKernelFunctor
, schema_(c10::detail::inferFunctionSchemaFromFunctor<c10::impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<typename FuncPtr::FuncType>>>())
Expand Down

0 comments on commit 2d6fd22

Please sign in to comment.