diff --git a/kernels/optimized/CMakeLists.txt b/kernels/optimized/CMakeLists.txt index 01a10f77846..2c340b70b7d 100644 --- a/kernels/optimized/CMakeLists.txt +++ b/kernels/optimized/CMakeLists.txt @@ -24,6 +24,7 @@ endif() set(_common_compile_options $<$:/wd4996> $<$>:-Wno-deprecated-declarations> + $<$:-Wno-psabi> ) # Note for apple platform we can rely on Accelerate framework Will come back to diff --git a/kernels/optimized/cpu/op_elu.cpp b/kernels/optimized/cpu/op_elu.cpp index 30f7ff442d8..c3ce156b31c 100644 --- a/kernels/optimized/cpu/op_elu.cpp +++ b/kernels/optimized/cpu/op_elu.cpp @@ -41,7 +41,7 @@ void elu( 0, out.numel(), ::executorch::extension::internal::GRAIN_SIZE, - [&](const auto begin, const auto end) { + [&](const auto& begin, const auto& end) { using Vec = at::vec::Vectorized; const auto vectorized_begin = begin + (Vec::size() - begin % Vec::size()) % Vec::size(); diff --git a/kernels/optimized/cpu/op_log_softmax.cpp b/kernels/optimized/cpu/op_log_softmax.cpp index 629a81a6429..256a4079a58 100644 --- a/kernels/optimized/cpu/op_log_softmax.cpp +++ b/kernels/optimized/cpu/op_log_softmax.cpp @@ -55,7 +55,7 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) { 0, outer_size, ::executorch::extension::internal::GRAIN_SIZE, - [&](const auto begin, const auto end) { + [&](const auto& begin, const auto& end) { at::native::serial_vec_log_softmax_lastdim_range( input_data_base, output_data_base, diff --git a/kernels/portable/cpu/op_add.cpp b/kernels/portable/cpu/op_add.cpp index 122b2a2c97e..c7782e4276b 100644 --- a/kernels/portable/cpu/op_add.cpp +++ b/kernels/portable/cpu/op_add.cpp @@ -80,7 +80,7 @@ Tensor& add_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBBF16>( - [val_alpha](const auto val_a, const auto val_b) { + [val_alpha](const auto& val_a, const auto& val_b) { return val_a + val_alpha * val_b; }, ctx, @@ -136,7 +136,7 @@ Tensor& add_scalar_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [val_alpha_times_b](const auto val_a) { + [val_alpha_times_b](const auto& val_a) { // Cast here supports vectorization; either it does nothing // or it casts from CTYPE_COMPUTE to // Vectorized. diff --git a/kernels/portable/cpu/op_addmm.cpp b/kernels/portable/cpu/op_addmm.cpp index 440a8b2c0fa..3e53100d356 100644 --- a/kernels/portable/cpu/op_addmm.cpp +++ b/kernels/portable/cpu/op_addmm.cpp @@ -92,7 +92,7 @@ Tensor& addmm_out( CTYPE, op_name, utils::SupportedTensorDtypes::REALHBF16>( - [alpha_val, beta_val](const auto val_a, const auto val_b) { + [alpha_val, beta_val](const auto& val_a, const auto& val_b) { return val_a * alpha_val + val_b * beta_val; }, ctx, diff --git a/kernels/portable/cpu/op_atan2.cpp b/kernels/portable/cpu/op_atan2.cpp index 5390eb52820..d5ec2516bd4 100644 --- a/kernels/portable/cpu/op_atan2.cpp +++ b/kernels/portable/cpu/op_atan2.cpp @@ -59,7 +59,7 @@ Tensor& atan2_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::FLOATHBF16>( - [](const auto val_a, const auto val_b) { + [](const auto& val_a, const auto& val_b) { return executorch::math::atan2(val_a, val_b); }, ctx, diff --git a/kernels/portable/cpu/op_clamp.cpp b/kernels/portable/cpu/op_clamp.cpp index 8ac78fd5477..72134ae9ff8 100644 --- a/kernels/portable/cpu/op_clamp.cpp +++ b/kernels/portable/cpu/op_clamp.cpp @@ -139,7 +139,7 @@ Tensor& clamp_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [has_min, min_opt, has_max, max_opt](const auto val_in) { + [has_min, min_opt, has_max, max_opt](const auto& val_in) { auto val_out = val_in; if (has_min) { val_out = utils::max_override( diff --git a/kernels/portable/cpu/op_div.cpp b/kernels/portable/cpu/op_div.cpp index 51a65747b33..f94f219d853 100644 --- a/kernels/portable/cpu/op_div.cpp +++ b/kernels/portable/cpu/op_div.cpp @@ -62,7 +62,7 @@ Tensor& div_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::FLOATHBF16>( - [](const auto val_a, const auto val_b) { return val_a / val_b; }, + [](const auto& val_a, const auto& val_b) { return val_a / val_b; }, ctx, a, utils::SupportedTensorDtypes::REALHBBF16, @@ -195,7 +195,7 @@ Tensor& div_scalar_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [val_b](const auto val_a) { return val_a / val_b; }, + [val_b](const auto& val_a) { return val_a / val_b; }, ctx, a, utils::SupportedTensorDtypes::REALHBBF16, diff --git a/kernels/portable/cpu/op_fmod.cpp b/kernels/portable/cpu/op_fmod.cpp index 40bb4a5e94c..05bb4f9e553 100644 --- a/kernels/portable/cpu/op_fmod.cpp +++ b/kernels/portable/cpu/op_fmod.cpp @@ -138,7 +138,7 @@ Tensor& fmod_Scalar_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBF16>( - [val_b](const auto val_a) { + [val_b](const auto& val_a) { return executorch::math::fmod(val_a, (decltype(val_a))val_b); }, ctx, diff --git a/kernels/portable/cpu/op_isinf.cpp b/kernels/portable/cpu/op_isinf.cpp index ac0c19f0f7a..d6fab50ec29 100644 --- a/kernels/portable/cpu/op_isinf.cpp +++ b/kernels/portable/cpu/op_isinf.cpp @@ -14,7 +14,18 @@ namespace torch { namespace executor { namespace native { -DEFINE_UNARY_UFUNC_REALHBBF16_TO_BOOL(isinf_out, std::isinf) +bool isinf_float(float x) { + return std::isinf(x); +} + +bool isinf_double(double x) { + return std::isinf(x); +} + +Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { + return internal::unary_ufunc_realhbbf16_to_bool( + isinf_float, isinf_double, ctx, in, out); +} } // namespace native } // namespace executor diff --git a/kernels/portable/cpu/op_isnan.cpp b/kernels/portable/cpu/op_isnan.cpp index dad38a2619a..bd3aaf7806a 100644 --- a/kernels/portable/cpu/op_isnan.cpp +++ b/kernels/portable/cpu/op_isnan.cpp @@ -13,8 +13,18 @@ namespace torch { namespace executor { namespace native { +bool isnan_float(float x) { + return std::isnan(x); +} -DEFINE_UNARY_UFUNC_REALHBBF16_TO_BOOL(isnan_out, std::isnan) +bool isnan_double(double x) { + return std::isnan(x); +} + +Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { + return internal::unary_ufunc_realhbbf16_to_bool( + isnan_float, isnan_double, ctx, in, out); +} } // namespace native } // namespace executor diff --git a/kernels/portable/cpu/op_maximum.cpp b/kernels/portable/cpu/op_maximum.cpp index c7979e40d7c..3da154ede82 100644 --- a/kernels/portable/cpu/op_maximum.cpp +++ b/kernels/portable/cpu/op_maximum.cpp @@ -49,7 +49,7 @@ Tensor& maximum_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBBF16>( - [](const auto val_a, const auto val_b) { + [](const auto& val_a, const auto& val_b) { return utils::max_override(val_a, val_b); }, ctx, diff --git a/kernels/portable/cpu/op_mul.cpp b/kernels/portable/cpu/op_mul.cpp index 6d4f30106ca..58172c249d4 100644 --- a/kernels/portable/cpu/op_mul.cpp +++ b/kernels/portable/cpu/op_mul.cpp @@ -72,7 +72,7 @@ Tensor& mul_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBBF16>( - [](const auto val_a, const auto val_b) { return val_a * val_b; }, + [](const auto& val_a, const auto& val_b) { return val_a * val_b; }, ctx, a, utils::SupportedTensorDtypes::REALHBBF16, @@ -116,7 +116,7 @@ Tensor& mul_scalar_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [val_b](const auto val_a) { return val_a * val_b; }, + [val_b](const auto& val_a) { return val_a * val_b; }, ctx, a, utils::SupportedTensorDtypes::REALHBBF16, diff --git a/kernels/portable/cpu/op_neg.cpp b/kernels/portable/cpu/op_neg.cpp index d184eb873d5..4d7a9284e4c 100644 --- a/kernels/portable/cpu/op_neg.cpp +++ b/kernels/portable/cpu/op_neg.cpp @@ -39,7 +39,7 @@ Tensor& neg_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { CTYPE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [](const auto val_in) { return -val_in; }, + [](const auto& val_in) { return -val_in; }, ctx, in, utils::SupportedTensorDtypes::REALHBF16, diff --git a/kernels/portable/cpu/op_pow.cpp b/kernels/portable/cpu/op_pow.cpp index aaf934b9adf..31085165dde 100644 --- a/kernels/portable/cpu/op_pow.cpp +++ b/kernels/portable/cpu/op_pow.cpp @@ -57,7 +57,7 @@ Tensor& pow_Tensor_Tensor_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBF16>( - [](const auto val_a, const auto val_b) { + [](const auto& val_a, const auto& val_b) { return executorch::math::pow(val_a, val_b); }, ctx, diff --git a/kernels/portable/cpu/op_rsub.cpp b/kernels/portable/cpu/op_rsub.cpp index 6a0a77b6596..17faed95c52 100644 --- a/kernels/portable/cpu/op_rsub.cpp +++ b/kernels/portable/cpu/op_rsub.cpp @@ -56,7 +56,7 @@ Tensor& rsub_scalar_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::SAME_AS_COMMON>( - [val_b, val_alpha](const auto val_a) { + [val_b, val_alpha](const auto& val_a) { return val_b - val_alpha * val_a; }, ctx, diff --git a/kernels/portable/cpu/op_sigmoid.cpp b/kernels/portable/cpu/op_sigmoid.cpp index 0578c846ab7..08c85e8fd01 100644 --- a/kernels/portable/cpu/op_sigmoid.cpp +++ b/kernels/portable/cpu/op_sigmoid.cpp @@ -47,7 +47,7 @@ Tensor& sigmoid_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::FLOATHBF16>( - [](const auto val_in) { + [](const auto& val_in) { const auto one = static_cast(1.0); auto out_val = one / (one + executorch::math::exp(-val_in)); return out_val; diff --git a/kernels/portable/cpu/op_sub.cpp b/kernels/portable/cpu/op_sub.cpp index b914c411303..32322aa90cd 100644 --- a/kernels/portable/cpu/op_sub.cpp +++ b/kernels/portable/cpu/op_sub.cpp @@ -60,7 +60,7 @@ Tensor& sub_out( CTYPE_COMPUTE, op_name, utils::SupportedTensorDtypes::REALHBF16>( - [val_alpha](const auto val_a, const auto val_b) { + [val_alpha](const auto& val_a, const auto& val_b) { return val_a - (decltype(val_b))(val_alpha)*val_b; }, ctx,