From 762f0aac3e35c1fcba49c448c380c7ee660feb4d Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Mon, 28 Oct 2024 08:47:56 -0700 Subject: [PATCH] [Executorch] optimized sigmoid basically use exp approximation using sleef instead of std::exp Differential Revision: [D64156864](https://our.internmc.facebook.com/intern/diff/D64156864/) [ghstack-poisoned] --- kernels/optimized/cpu/op_sigmoid.cpp | 102 +++++++++++++++++++++++++++ kernels/optimized/cpu/targets.bzl | 1 + kernels/optimized/optimized-oss.yaml | 5 ++ kernels/optimized/optimized.yaml | 5 ++ kernels/test/targets.bzl | 2 +- 5 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 kernels/optimized/cpu/op_sigmoid.cpp diff --git a/kernels/optimized/cpu/op_sigmoid.cpp b/kernels/optimized/cpu/op_sigmoid.cpp new file mode 100644 index 00000000000..13f8ebb8413 --- /dev/null +++ b/kernels/optimized/cpu/op_sigmoid.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include + +namespace torch { +namespace executor { +namespace native { + +namespace { +template < + typename CTYPE_IN, + typename CTYPE_OUT, + typename std::enable_if< + std::is_same_v && + !std::is_same_v && + !std::is_same_v, + int>::type = 0> +void sigmoid_data( + const CTYPE_IN* in_data, + const size_t numel, + CTYPE_OUT* out_data) { + using Vec = executorch::vec::Vectorized; + executorch::vec::map( + [](Vec x) { + auto one_plus_exp = x.neg().exp() + Vec(1.0); + return one_plus_exp.reciprocal(); + }, + out_data, + in_data, + numel); +} + +template < + typename CTYPE_IN, + typename CTYPE_OUT, + typename std::enable_if< + !std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v, + int>::type = 0> +void sigmoid_data( + const CTYPE_IN* in_data, + const size_t numel, + CTYPE_OUT* out_data) { + for (size_t i = 0; i < numel; i++) { + CTYPE_OUT xi = static_cast(in_data[i]); + out_data[i] = (1.0 / (1.0 + std::exp(-xi))); + } +} + +} // namespace + +using Tensor = exec_aten::Tensor; + +Tensor& +opt_sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { + (void)ctx; + + ET_KERNEL_CHECK( + ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out); + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); + + ET_KERNEL_CHECK( + ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); + + // Resize for dynamic shape + ET_KERNEL_CHECK_MSG( + ctx, + resize_tensor(out, in.sizes()) == Error::Ok, + InvalidArgument, + out, + "Failed to resize output tensor."); + + ScalarType in_type = in.scalar_type(); + ScalarType out_type = out.scalar_type(); + ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() { + ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() { + sigmoid_data( + in.const_data_ptr(), + in.numel(), + out.mutable_data_ptr()); + }); + }); + + return out; +} + +} // namespace native +} // namespace executor +} // namespace torch diff --git a/kernels/optimized/cpu/targets.bzl b/kernels/optimized/cpu/targets.bzl index 77a270cc45d..5e5f6dd7b99 100644 --- a/kernels/optimized/cpu/targets.bzl +++ b/kernels/optimized/cpu/targets.bzl @@ -25,6 +25,7 @@ _OPTIMIZED_ATEN_OPS = ( ], ), op_target(name = "op_exp"), + op_target(name = "op_sigmoid"), op_target( name = "op_gelu", deps = select({ diff --git a/kernels/optimized/optimized-oss.yaml b/kernels/optimized/optimized-oss.yaml index 797744f3bd4..b90d9a5afb6 100644 --- a/kernels/optimized/optimized-oss.yaml +++ b/kernels/optimized/optimized-oss.yaml @@ -35,6 +35,11 @@ - arg_meta: null kernel_name: torch::executor::opt_exp_out +- op: sigmoid.out + kernels: + - arg_meta: null + kernel_name: torch::executor::opt_exp_out + - op: le.Scalar_out kernels: - arg_meta: null diff --git a/kernels/optimized/optimized.yaml b/kernels/optimized/optimized.yaml index 2421673f8a7..ea07126a3b9 100644 --- a/kernels/optimized/optimized.yaml +++ b/kernels/optimized/optimized.yaml @@ -37,6 +37,11 @@ - arg_meta: null kernel_name: torch::executor::opt_exp_out +- op: sigmoid.out + kernels: + - arg_meta: null + kernel_name: torch::executor::opt_sigmoid_out + - op: gelu.out kernels: - arg_meta: null diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl index 91b3ba89fde..997210df239 100644 --- a/kernels/test/targets.bzl +++ b/kernels/test/targets.bzl @@ -296,7 +296,7 @@ def define_common_targets(): _common_op_test("op_scatter_add_test", ["aten", "portable"]) _common_op_test("op_select_scatter_test", ["aten", "portable"]) _common_op_test("op_select_copy_test", ["aten", "portable"]) - _common_op_test("op_sigmoid_test", ["aten", "portable"]) + _common_op_test("op_sigmoid_test", ["aten", "portable", "optimized"]) _common_op_test("op_sign_test", ["aten", "portable"]) _common_op_test("op_sin_test", ["aten", "portable"]) _common_op_test("op_sinh_test", ["aten", "portable"])