diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5b331e4444915..b024e8a68bd6e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1718,16 +1718,6 @@ class SelectionDAG { /// the target's desired shift amount type. LLVM_ABI SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); - /// Expands a node with multiple results to an FP or vector libcall. The - /// libcall is expected to take all the operands of the \p Node followed by - /// output pointers for each of the results. \p CallRetResNo can be optionally - /// set to indicate that one of the results comes from the libcall's return - /// value. - LLVM_ABI bool - expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node, - SmallVectorImpl &Results, - std::optional CallRetResNo = {}); - /// Expand the specified \c ISD::VAARG node as the Legalize pass would. LLVM_ABI SDValue expandVAArg(SDNode *Node); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 4d5d1fc7dfadc..cec7d09f494d6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5757,6 +5757,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { /// consisting of zext/sext, extract_subvector, mul and add operations. SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const; + /// Expands a node with multiple results to an FP or vector libcall. The + /// libcall is expected to take all the operands of the \p Node followed by + /// output pointers for each of the results. \p CallRetResNo can be optionally + /// set to indicate that one of the results comes from the libcall's return + /// value. + bool expandMultipleResultFPLibCall( + SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, + SmallVectorImpl &Results, + std::optional CallRetResNo = {}) const; + /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC /// on the current target. A VP_SETCC will additionally be given a Mask /// and/or EVL not equal to SDValue(). diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index ce7e836f66446..b40b7f199f9e5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -182,10 +182,63 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { def MODF_#FPTy : RuntimeLibcall; } -foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { - def MODF_#VecTy : RuntimeLibcall; - def SINCOS_#VecTy : RuntimeLibcall; - def SINCOSPI_#VecTy : RuntimeLibcall; +defvar F32VectorSuffixes = ["V2F32", "V4F32", "V8F32", "V16F32", "NXV4F32"]; +defvar F64VectorSuffixes = ["V2F64", "V4F64", "V8F64", "NXV2F64"]; + +foreach S = !listconcat(F32VectorSuffixes, F64VectorSuffixes) in { + def ACOS_#S : RuntimeLibcall; + def ACOSH_#S : RuntimeLibcall; + def ASIN_#S : RuntimeLibcall; + def ASINH_#S : RuntimeLibcall; + def ATAN_#S : RuntimeLibcall; + def ATAN2_#S : RuntimeLibcall; + def ATANH_#S : RuntimeLibcall; + def CBRT_#S : RuntimeLibcall; + def CEIL_#S : RuntimeLibcall; + def COPYSIGN_#S : RuntimeLibcall; + def COS_#S : RuntimeLibcall; + def COSH_#S : RuntimeLibcall; + def COSPI_#S : RuntimeLibcall; + def ERFC_#S : RuntimeLibcall; + def ERF_#S : RuntimeLibcall; + def EXP_#S : RuntimeLibcall; + def EXP_FINITE_#S : RuntimeLibcall; + def EXP10_#S : RuntimeLibcall; + def EXP2_#S : RuntimeLibcall; + def EXPM1_#S : RuntimeLibcall; + def FABS_#S : RuntimeLibcall; + def FDIM_#S : RuntimeLibcall; + def FLOOR_#S : RuntimeLibcall; + def FMA_#S : RuntimeLibcall; + def FMAX_#S : RuntimeLibcall; + def FMIN_#S : RuntimeLibcall; + def FMOD_#S : RuntimeLibcall; + def HYPOT_#S : RuntimeLibcall; + def ILOGB_#S : RuntimeLibcall; + def LDEXP_#S : RuntimeLibcall; + def LGAMMA_#S : RuntimeLibcall; + def LOG_#S : RuntimeLibcall; + def LOG10_#S : RuntimeLibcall; + def LOG1P_#S : RuntimeLibcall; + def LOG2_#S : RuntimeLibcall; + def LOGB_#S : RuntimeLibcall; + def MODF_#S : RuntimeLibcall; + def NEXTAFTER_#S : RuntimeLibcall; + def POW_#S : RuntimeLibcall; + def SINCOS_#S : RuntimeLibcall; + def SINCOSPI_#S : RuntimeLibcall; + def SIN_#S : RuntimeLibcall; + def SINH_#S : RuntimeLibcall; + def SINPI_#S : RuntimeLibcall; + def SQRT_#S : RuntimeLibcall; + def TAN_#S : RuntimeLibcall; + def TANH_#S : RuntimeLibcall; + def TGAMMA_#S : RuntimeLibcall; +} + +foreach S = F64VectorSuffixes in { + def LOG_FINITE_#S : RuntimeLibcall; + def POW_FINITE_#S : RuntimeLibcall; } def FEGETENV : RuntimeLibcall; @@ -1089,50 +1142,6 @@ def __security_check_cookie : RuntimeLibcallImpl; def __security_check_cookie_arm64ec : RuntimeLibcallImpl; -//===----------------------------------------------------------------------===// -// sleef calls -//===----------------------------------------------------------------------===// - -defset list SleefLibcalls = { - def _ZGVnN2vl8_modf : RuntimeLibcallImpl; - def _ZGVnN4vl4_modff : RuntimeLibcallImpl; - def _ZGVsNxvl8_modf : RuntimeLibcallImpl; - def _ZGVsNxvl4_modff : RuntimeLibcallImpl; - - def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; - def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; - def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; - def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl; - - def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl; - def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl; - def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl; - def _ZGVsNxvl8l8_sincospi : RuntimeLibcallImpl; -} - -//===----------------------------------------------------------------------===// -// ARMPL calls -//===----------------------------------------------------------------------===// - -defset list ARMPLLibcalls = { - def armpl_vmodfq_f64 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall - def armpl_vmodfq_f32 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall - def armpl_svmodf_f64_x : RuntimeLibcallImpl; - def armpl_svmodf_f32_x : RuntimeLibcallImpl; - - def armpl_vsincosq_f64 - : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall - def armpl_vsincosq_f32 - : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall - def armpl_svsincos_f64_x : RuntimeLibcallImpl; - def armpl_svsincos_f32_x : RuntimeLibcallImpl; - - def armpl_vsincospiq_f32 : RuntimeLibcallImpl; - def armpl_vsincospiq_f64 : RuntimeLibcallImpl; - def armpl_svsincospi_f32_x : RuntimeLibcallImpl; - def armpl_svsincospi_f64_x : RuntimeLibcallImpl; -} - //===----------------------------------------------------------------------===// // F128 libm Runtime Libcalls //===----------------------------------------------------------------------===// @@ -2769,3 +2778,926 @@ def LegacyDefaultSystemLibrary LibcallImpls<(add Int128RTLibcalls), isArch64Bit>, DefaultStackProtector )>; + +//===----------------------------------------------------------------------===// +// Vector math libraries +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Accelerate framework functions +//===----------------------------------------------------------------------===// + +defset list ACCELERATE_VECFUNCS = { + def vacosf : RuntimeLibcallImpl; + def vacoshf : RuntimeLibcallImpl; + def vasinf : RuntimeLibcallImpl; + def vasinhf : RuntimeLibcallImpl; + def vatan2f : RuntimeLibcallImpl; + def vatanf : RuntimeLibcallImpl; + def vatanhf : RuntimeLibcallImpl; + def vceilf : RuntimeLibcallImpl; + def vcosf : RuntimeLibcallImpl; + def vcoshf : RuntimeLibcallImpl; + def vexpf : RuntimeLibcallImpl; + def vexpm1f : RuntimeLibcallImpl; + def vfabsf : RuntimeLibcallImpl; + def vfloorf : RuntimeLibcallImpl; + def vlog10f : RuntimeLibcallImpl; + def vlog1pf : RuntimeLibcallImpl; + def vlogbf : RuntimeLibcallImpl; + def vlogf : RuntimeLibcallImpl; + def vsinf : RuntimeLibcallImpl; + def vsinhf : RuntimeLibcallImpl; + def vsqrtf : RuntimeLibcallImpl; + def vtanf : RuntimeLibcallImpl; + def vtanhf : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// Darwin_libsystem_m vector functions +//===----------------------------------------------------------------------===// + +defset list DARWIN_LIBSYSTEM_M_VECFUNCS = { + def _simd_acos_d2 : RuntimeLibcallImpl; + def _simd_acos_f4 : RuntimeLibcallImpl; + def _simd_acosh_d2 : RuntimeLibcallImpl; + def _simd_acosh_f4 : RuntimeLibcallImpl; + def _simd_asin_d2 : RuntimeLibcallImpl; + def _simd_asin_f4 : RuntimeLibcallImpl; + def _simd_asinh_d2 : RuntimeLibcallImpl; + def _simd_asinh_f4 : RuntimeLibcallImpl; + def _simd_atan2_d2 : RuntimeLibcallImpl; + def _simd_atan2_f4 : RuntimeLibcallImpl; + def _simd_atan_d2 : RuntimeLibcallImpl; + def _simd_atan_f4 : RuntimeLibcallImpl; + def _simd_atanh_d2 : RuntimeLibcallImpl; + def _simd_atanh_f4 : RuntimeLibcallImpl; + def _simd_cbrt_d2 : RuntimeLibcallImpl; + def _simd_cbrt_f4 : RuntimeLibcallImpl; + def _simd_cos_d2 : RuntimeLibcallImpl; + def _simd_cos_f4 : RuntimeLibcallImpl; + def _simd_cosh_d2 : RuntimeLibcallImpl; + def _simd_cosh_f4 : RuntimeLibcallImpl; + def _simd_erf_d2 : RuntimeLibcallImpl; + def _simd_erf_f4 : RuntimeLibcallImpl; + def _simd_exp_d2 : RuntimeLibcallImpl; + def _simd_exp_f4 : RuntimeLibcallImpl; + def _simd_pow_d2 : RuntimeLibcallImpl; + def _simd_pow_f4 : RuntimeLibcallImpl; + def _simd_sin_d2 : RuntimeLibcallImpl; + def _simd_sin_f4 : RuntimeLibcallImpl; + def _simd_sinh_d2 : RuntimeLibcallImpl; + def _simd_sinh_f4 : RuntimeLibcallImpl; + def _simd_tan_d2 : RuntimeLibcallImpl; + def _simd_tan_f4 : RuntimeLibcallImpl; + def _simd_tanh_d2 : RuntimeLibcallImpl; + def _simd_tanh_f4 : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// GLIBC Vector Math library LIBMVEC functions +//===----------------------------------------------------------------------===// + +defvar LIBMVECPrefix = "LIBMVEC_"; + +class LibmvecLibcall + : RuntimeLibcallImpl; + +defset list LIBMVEC_X86_VECFUNCS = { + def LIBMVEC__ZGVbN2v___exp_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN2v___log_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN2v_cos : LibmvecLibcall; + def LIBMVEC__ZGVbN2v_exp : LibmvecLibcall; + def LIBMVEC__ZGVbN2v_log : LibmvecLibcall; + def LIBMVEC__ZGVbN2v_sin : LibmvecLibcall; + def LIBMVEC__ZGVbN2v_tan : LibmvecLibcall; + def LIBMVEC__ZGVbN2vv___pow_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN2vv_pow : LibmvecLibcall; + def LIBMVEC__ZGVbN4v___expf_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN4v___logf_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN4v_cosf : LibmvecLibcall; + def LIBMVEC__ZGVbN4v_expf : LibmvecLibcall; + def LIBMVEC__ZGVbN4v_logf : LibmvecLibcall; + def LIBMVEC__ZGVbN4v_sinf : LibmvecLibcall; + def LIBMVEC__ZGVbN4v_tanf : LibmvecLibcall; + def LIBMVEC__ZGVbN4vv___powf_finite : LibmvecLibcall; + def LIBMVEC__ZGVbN4vv_powf : LibmvecLibcall; + def LIBMVEC__ZGVdN4v___exp_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN4v___log_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN4v_cos : LibmvecLibcall; + def LIBMVEC__ZGVdN4v_exp : LibmvecLibcall; + def LIBMVEC__ZGVdN4v_log : LibmvecLibcall; + def LIBMVEC__ZGVdN4v_sin : LibmvecLibcall; + def LIBMVEC__ZGVdN4v_tan : LibmvecLibcall; + def LIBMVEC__ZGVdN4vv___pow_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN4vv_pow : LibmvecLibcall; + def LIBMVEC__ZGVdN8v___expf_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN8v___logf_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN8v_cosf : LibmvecLibcall; + def LIBMVEC__ZGVdN8v_expf : LibmvecLibcall; + def LIBMVEC__ZGVdN8v_logf : LibmvecLibcall; + def LIBMVEC__ZGVdN8v_sinf : LibmvecLibcall; + def LIBMVEC__ZGVdN8v_tanf : LibmvecLibcall; + def LIBMVEC__ZGVdN8vv___powf_finite : LibmvecLibcall; + def LIBMVEC__ZGVdN8vv_powf : LibmvecLibcall; +} + +defset list LIBMVEC_AARCH64_VECFUNCS = { + def LIBMVEC__ZGVnN2v_acos : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_acosf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_acosh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_acoshf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_asin : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_asinf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_asinh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_asinhf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_atan : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_atanf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_atanh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_atanhf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_cbrt : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_cbrtf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_cos : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_cosf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_cosh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_coshf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_erf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_erfc : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_erfcf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_erff : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_exp : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_exp10 : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_exp10f : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_exp2 : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_exp2f : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_expf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_expm1 : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_expm1f : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log10 : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log10f : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log1p : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log1pf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log2 : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_log2f : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_logf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_sin : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_sinf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_sinh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_sinhf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_tan : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_tanf : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_tanh : LibmvecLibcall; + def LIBMVEC__ZGVnN2v_tanhf : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_atan2 : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_atan2f : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_hypot : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_hypotf : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_pow : LibmvecLibcall; + def LIBMVEC__ZGVnN2vv_powf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_acosf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_acoshf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_asinf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_asinhf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_atanf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_atanhf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_cbrtf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_cosf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_coshf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_erfcf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_erff : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_exp10f : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_exp2f : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_expf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_expm1f : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_log10f : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_log1pf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_log2f : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_logf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_sinf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_sinhf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_tanf : LibmvecLibcall; + def LIBMVEC__ZGVnN4v_tanhf : LibmvecLibcall; + def LIBMVEC__ZGVnN4vv_atan2f : LibmvecLibcall; + def LIBMVEC__ZGVnN4vv_hypotf : LibmvecLibcall; + def LIBMVEC__ZGVnN4vv_powf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_acos : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_acosf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_acosh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_acoshf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_asin : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_asinf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_asinh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_asinhf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_atan : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_atanf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_atanh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_atanhf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_cbrt : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_cbrtf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_cos : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_cosf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_cosh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_coshf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_erf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_erfc : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_erfcf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_erff : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_exp : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_exp10 : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_exp10f : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_exp2 : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_exp2f : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_expf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_expm1 : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_expm1f : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log10 : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log10f : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log1p : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log1pf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log2 : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_log2f : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_logf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_sin : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_sinf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_sinh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_sinhf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_tan : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_tanf : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_tanh : LibmvecLibcall; + def LIBMVEC__ZGVsMxv_tanhf : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_atan2 : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_atan2f : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_hypot : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_hypotf : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_pow : LibmvecLibcall; + def LIBMVEC__ZGVsMxvv_powf : LibmvecLibcall; +} + +//===----------------------------------------------------------------------===// +// IBM MASS vector library (MASSV) functions +//===----------------------------------------------------------------------===// + +defset list MASSV_VECFUNCS = { + def __acosd2 : RuntimeLibcallImpl; + def __acosf4 : RuntimeLibcallImpl; + def __acoshd2 : RuntimeLibcallImpl; + def __acoshf4 : RuntimeLibcallImpl; + def __asind2 : RuntimeLibcallImpl; + def __asinf4 : RuntimeLibcallImpl; + def __asinhd2 : RuntimeLibcallImpl; + def __asinhf4 : RuntimeLibcallImpl; + def __atan2d2 : RuntimeLibcallImpl; + def __atan2f4 : RuntimeLibcallImpl; + def __atand2 : RuntimeLibcallImpl; + def __atanf4 : RuntimeLibcallImpl; + def __atanhd2 : RuntimeLibcallImpl; + def __atanhf4 : RuntimeLibcallImpl; + def __cbrtd2 : RuntimeLibcallImpl; + def __cbrtf4 : RuntimeLibcallImpl; + def __cosd2 : RuntimeLibcallImpl; + def __cosf4 : RuntimeLibcallImpl; + def __coshd2 : RuntimeLibcallImpl; + def __coshf4 : RuntimeLibcallImpl; + def __exp2d2 : RuntimeLibcallImpl; + def __exp2f4 : RuntimeLibcallImpl; + def __expd2 : RuntimeLibcallImpl; + def __expf4 : RuntimeLibcallImpl; + def __expm1d2 : RuntimeLibcallImpl; + def __expm1f4 : RuntimeLibcallImpl; + def __log10d2 : RuntimeLibcallImpl; + def __log10f4 : RuntimeLibcallImpl; + def __log1pd2 : RuntimeLibcallImpl; + def __log1pf4 : RuntimeLibcallImpl; + def __log2d2 : RuntimeLibcallImpl; + def __log2f4 : RuntimeLibcallImpl; + def __logd2 : RuntimeLibcallImpl; + def __logf4 : RuntimeLibcallImpl; + def __powd2 : RuntimeLibcallImpl; + def __powf4 : RuntimeLibcallImpl; + def __sind2 : RuntimeLibcallImpl; + def __sinf4 : RuntimeLibcallImpl; + def __sinhd2 : RuntimeLibcallImpl; + def __sinhf4 : RuntimeLibcallImpl; + def __tand2 : RuntimeLibcallImpl; + def __tanf4 : RuntimeLibcallImpl; + def __tanhd2 : RuntimeLibcallImpl; + def __tanhf4 : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// Intel SVML library functions +//===----------------------------------------------------------------------===// + +defset list SVML_VECFUNCS = { + def __svml_cos2 : RuntimeLibcallImpl; + def __svml_cos4 : RuntimeLibcallImpl; + def __svml_cos8 : RuntimeLibcallImpl; + def __svml_cosf16 : RuntimeLibcallImpl; + def __svml_cosf4 : RuntimeLibcallImpl; + def __svml_cosf8 : RuntimeLibcallImpl; + def __svml_exp2 : RuntimeLibcallImpl; + def __svml_exp22 : RuntimeLibcallImpl; + def __svml_exp24 : RuntimeLibcallImpl; + def __svml_exp28 : RuntimeLibcallImpl; + def __svml_exp2f16 : RuntimeLibcallImpl; + def __svml_exp2f4 : RuntimeLibcallImpl; + def __svml_exp2f8 : RuntimeLibcallImpl; + def __svml_exp4 : RuntimeLibcallImpl; + def __svml_exp8 : RuntimeLibcallImpl; + def __svml_expf16 : RuntimeLibcallImpl; + def __svml_expf4 : RuntimeLibcallImpl; + def __svml_expf8 : RuntimeLibcallImpl; + def __svml_log102 : RuntimeLibcallImpl; + def __svml_log104 : RuntimeLibcallImpl; + def __svml_log108 : RuntimeLibcallImpl; + def __svml_log10f16 : RuntimeLibcallImpl; + def __svml_log10f4 : RuntimeLibcallImpl; + def __svml_log10f8 : RuntimeLibcallImpl; + def __svml_log2 : RuntimeLibcallImpl; + def __svml_log22 : RuntimeLibcallImpl; + def __svml_log24 : RuntimeLibcallImpl; + def __svml_log28 : RuntimeLibcallImpl; + def __svml_log2f16 : RuntimeLibcallImpl; + def __svml_log2f4 : RuntimeLibcallImpl; + def __svml_log2f8 : RuntimeLibcallImpl; + def __svml_log4 : RuntimeLibcallImpl; + def __svml_log8 : RuntimeLibcallImpl; + def __svml_logf16 : RuntimeLibcallImpl; + def __svml_logf4 : RuntimeLibcallImpl; + def __svml_logf8 : RuntimeLibcallImpl; + def __svml_pow2 : RuntimeLibcallImpl; + def __svml_pow4 : RuntimeLibcallImpl; + def __svml_pow8 : RuntimeLibcallImpl; + def __svml_powf16 : RuntimeLibcallImpl; + def __svml_powf4 : RuntimeLibcallImpl; + def __svml_powf8 : RuntimeLibcallImpl; + def __svml_sin2 : RuntimeLibcallImpl; + def __svml_sin4 : RuntimeLibcallImpl; + def __svml_sin8 : RuntimeLibcallImpl; + def __svml_sinf16 : RuntimeLibcallImpl; + def __svml_sinf4 : RuntimeLibcallImpl; + def __svml_sinf8 : RuntimeLibcallImpl; + def __svml_sqrt2 : RuntimeLibcallImpl; + def __svml_sqrt4 : RuntimeLibcallImpl; + def __svml_sqrt8 : RuntimeLibcallImpl; + def __svml_sqrtf16 : RuntimeLibcallImpl; + def __svml_sqrtf4 : RuntimeLibcallImpl; + def __svml_sqrtf8 : RuntimeLibcallImpl; + def __svml_tan2 : RuntimeLibcallImpl; + def __svml_tan4 : RuntimeLibcallImpl; + def __svml_tan8 : RuntimeLibcallImpl; + def __svml_tanf16 : RuntimeLibcallImpl; + def __svml_tanf4 : RuntimeLibcallImpl; + def __svml_tanf8 : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// SIMD Library for Evaluating Elementary Functions +//===----------------------------------------------------------------------===// + +defset list SLEEFGNUABI_VF2_VECFUNCS = { + def _ZGVnN2v_acos : RuntimeLibcallImpl; + def _ZGVnN2v_acosh : RuntimeLibcallImpl; + def _ZGVnN2v_asin : RuntimeLibcallImpl; + def _ZGVnN2v_asinh : RuntimeLibcallImpl; + def _ZGVnN2v_atan : RuntimeLibcallImpl; + def _ZGVnN2v_atanh : RuntimeLibcallImpl; + def _ZGVnN2v_cbrt : RuntimeLibcallImpl; + def _ZGVnN2v_cos : RuntimeLibcallImpl; + def _ZGVnN2v_cosh : RuntimeLibcallImpl; + def _ZGVnN2v_cospi : RuntimeLibcallImpl; + def _ZGVnN2v_erf : RuntimeLibcallImpl; + def _ZGVnN2v_erfc : RuntimeLibcallImpl; + def _ZGVnN2v_exp : RuntimeLibcallImpl; + def _ZGVnN2v_exp10 : RuntimeLibcallImpl; + def _ZGVnN2v_exp2 : RuntimeLibcallImpl; + def _ZGVnN2v_expm1 : RuntimeLibcallImpl; + def _ZGVnN2v_ilogb : RuntimeLibcallImpl; + def _ZGVnN2v_lgamma : RuntimeLibcallImpl; + def _ZGVnN2v_log : RuntimeLibcallImpl; + def _ZGVnN2v_log10 : RuntimeLibcallImpl; + def _ZGVnN2v_log1p : RuntimeLibcallImpl; + def _ZGVnN2v_log2 : RuntimeLibcallImpl; + def _ZGVnN2v_sin : RuntimeLibcallImpl; + def _ZGVnN2v_sinh : RuntimeLibcallImpl; + def _ZGVnN2v_sinpi : RuntimeLibcallImpl; + def _ZGVnN2v_sqrt : RuntimeLibcallImpl; + def _ZGVnN2v_tan : RuntimeLibcallImpl; + def _ZGVnN2v_tanh : RuntimeLibcallImpl; + def _ZGVnN2v_tgamma : RuntimeLibcallImpl; + def _ZGVnN2vl8_modf : RuntimeLibcallImpl; + def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; + def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl; + def _ZGVnN2vv_atan2 : RuntimeLibcallImpl; + def _ZGVnN2vv_copysign : RuntimeLibcallImpl; + def _ZGVnN2vv_fdim : RuntimeLibcallImpl; + def _ZGVnN2vv_fmax : RuntimeLibcallImpl; + def _ZGVnN2vv_fmin : RuntimeLibcallImpl; + def _ZGVnN2vv_fmod : RuntimeLibcallImpl; + def _ZGVnN2vv_hypot : RuntimeLibcallImpl; + def _ZGVnN2vv_ldexp : RuntimeLibcallImpl; + def _ZGVnN2vv_nextafter : RuntimeLibcallImpl; + def _ZGVnN2vv_pow : RuntimeLibcallImpl; + def _ZGVnN2vvv_fma : RuntimeLibcallImpl; +} + +defset list SLEEFGNUABI_VF4_VECFUNCS = { + def _ZGVnN4v_acosf : RuntimeLibcallImpl; + def _ZGVnN4v_acoshf : RuntimeLibcallImpl; + def _ZGVnN4v_asinf : RuntimeLibcallImpl; + def _ZGVnN4v_asinhf : RuntimeLibcallImpl; + def _ZGVnN4v_atanf : RuntimeLibcallImpl; + def _ZGVnN4v_atanhf : RuntimeLibcallImpl; + def _ZGVnN4v_cbrtf : RuntimeLibcallImpl; + def _ZGVnN4v_cosf : RuntimeLibcallImpl; + def _ZGVnN4v_coshf : RuntimeLibcallImpl; + def _ZGVnN4v_cospif : RuntimeLibcallImpl; + def _ZGVnN4v_erfcf : RuntimeLibcallImpl; + def _ZGVnN4v_erff : RuntimeLibcallImpl; + def _ZGVnN4v_exp10f : RuntimeLibcallImpl; + def _ZGVnN4v_exp2f : RuntimeLibcallImpl; + def _ZGVnN4v_expf : RuntimeLibcallImpl; + def _ZGVnN4v_expm1f : RuntimeLibcallImpl; + def _ZGVnN4v_ilogbf : RuntimeLibcallImpl; + def _ZGVnN4v_lgammaf : RuntimeLibcallImpl; + def _ZGVnN4v_log10f : RuntimeLibcallImpl; + def _ZGVnN4v_log1pf : RuntimeLibcallImpl; + def _ZGVnN4v_log2f : RuntimeLibcallImpl; + def _ZGVnN4v_logf : RuntimeLibcallImpl; + def _ZGVnN4v_sinf : RuntimeLibcallImpl; + def _ZGVnN4v_sinhf : RuntimeLibcallImpl; + def _ZGVnN4v_sinpif : RuntimeLibcallImpl; + def _ZGVnN4v_sqrtf : RuntimeLibcallImpl; + def _ZGVnN4v_tanf : RuntimeLibcallImpl; + def _ZGVnN4v_tanhf : RuntimeLibcallImpl; + def _ZGVnN4v_tgammaf : RuntimeLibcallImpl; + def _ZGVnN4vl4_modff : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl; + def _ZGVnN4vv_atan2f : RuntimeLibcallImpl; + def _ZGVnN4vv_copysignf : RuntimeLibcallImpl; + def _ZGVnN4vv_fdimf : RuntimeLibcallImpl; + def _ZGVnN4vv_fmaxf : RuntimeLibcallImpl; + def _ZGVnN4vv_fminf : RuntimeLibcallImpl; + def _ZGVnN4vv_fmodf : RuntimeLibcallImpl; + def _ZGVnN4vv_hypotf : RuntimeLibcallImpl; + def _ZGVnN4vv_ldexpf : RuntimeLibcallImpl; + def _ZGVnN4vv_nextafterf : RuntimeLibcallImpl; + def _ZGVnN4vv_powf : RuntimeLibcallImpl; + def _ZGVnN4vvv_fmaf : RuntimeLibcallImpl; +} + +defset list SLEEFGNUABI_SCALABLE_VECFUNCS = { + def _ZGVsMxv_acos : RuntimeLibcallImpl; + def _ZGVsMxv_acosf : RuntimeLibcallImpl; + def _ZGVsMxv_acosh : RuntimeLibcallImpl; + def _ZGVsMxv_acoshf : RuntimeLibcallImpl; + def _ZGVsMxv_asin : RuntimeLibcallImpl; + def _ZGVsMxv_asinf : RuntimeLibcallImpl; + def _ZGVsMxv_asinh : RuntimeLibcallImpl; + def _ZGVsMxv_asinhf : RuntimeLibcallImpl; + def _ZGVsMxv_atan : RuntimeLibcallImpl; + def _ZGVsMxv_atanf : RuntimeLibcallImpl; + def _ZGVsMxv_atanh : RuntimeLibcallImpl; + def _ZGVsMxv_atanhf : RuntimeLibcallImpl; + def _ZGVsMxv_cbrt : RuntimeLibcallImpl; + def _ZGVsMxv_cbrtf : RuntimeLibcallImpl; + def _ZGVsMxv_cos : RuntimeLibcallImpl; + def _ZGVsMxv_cosf : RuntimeLibcallImpl; + def _ZGVsMxv_cosh : RuntimeLibcallImpl; + def _ZGVsMxv_coshf : RuntimeLibcallImpl; + def _ZGVsMxv_cospi : RuntimeLibcallImpl; + def _ZGVsMxv_cospif : RuntimeLibcallImpl; + def _ZGVsMxv_erf : RuntimeLibcallImpl; + def _ZGVsMxv_erfc : RuntimeLibcallImpl; + def _ZGVsMxv_erfcf : RuntimeLibcallImpl; + def _ZGVsMxv_erff : RuntimeLibcallImpl; + def _ZGVsMxv_exp : RuntimeLibcallImpl; + def _ZGVsMxv_exp10 : RuntimeLibcallImpl; + def _ZGVsMxv_exp10f : RuntimeLibcallImpl; + def _ZGVsMxv_exp2 : RuntimeLibcallImpl; + def _ZGVsMxv_exp2f : RuntimeLibcallImpl; + def _ZGVsMxv_expf : RuntimeLibcallImpl; + def _ZGVsMxv_expm1 : RuntimeLibcallImpl; + def _ZGVsMxv_expm1f : RuntimeLibcallImpl; + def _ZGVsMxv_ilogb : RuntimeLibcallImpl; + def _ZGVsMxv_ilogbf : RuntimeLibcallImpl; + def _ZGVsMxv_lgamma : RuntimeLibcallImpl; + def _ZGVsMxv_lgammaf : RuntimeLibcallImpl; + def _ZGVsMxv_log : RuntimeLibcallImpl; + def _ZGVsMxv_log10 : RuntimeLibcallImpl; + def _ZGVsMxv_log10f : RuntimeLibcallImpl; + def _ZGVsMxv_log1p : RuntimeLibcallImpl; + def _ZGVsMxv_log1pf : RuntimeLibcallImpl; + def _ZGVsMxv_log2 : RuntimeLibcallImpl; + def _ZGVsMxv_log2f : RuntimeLibcallImpl; + def _ZGVsMxv_logf : RuntimeLibcallImpl; + def _ZGVsMxv_sin : RuntimeLibcallImpl; + def _ZGVsMxv_sinf : RuntimeLibcallImpl; + def _ZGVsMxv_sinh : RuntimeLibcallImpl; + def _ZGVsMxv_sinhf : RuntimeLibcallImpl; + def _ZGVsMxv_sinpi : RuntimeLibcallImpl; + def _ZGVsMxv_sinpif : RuntimeLibcallImpl; + def _ZGVsMxv_sqrt : RuntimeLibcallImpl; + def _ZGVsMxv_sqrtf : RuntimeLibcallImpl; + def _ZGVsMxv_tan : RuntimeLibcallImpl; + def _ZGVsMxv_tanf : RuntimeLibcallImpl; + def _ZGVsMxv_tanh : RuntimeLibcallImpl; + def _ZGVsMxv_tanhf : RuntimeLibcallImpl; + def _ZGVsMxv_tgamma : RuntimeLibcallImpl; + def _ZGVsMxv_tgammaf : RuntimeLibcallImpl; + def _ZGVsMxvv_atan2 : RuntimeLibcallImpl; + def _ZGVsMxvv_atan2f : RuntimeLibcallImpl; + def _ZGVsMxvv_copysign : RuntimeLibcallImpl; + def _ZGVsMxvv_copysignf : RuntimeLibcallImpl; + def _ZGVsMxvv_fdim : RuntimeLibcallImpl; + def _ZGVsMxvv_fdimf : RuntimeLibcallImpl; + def _ZGVsMxvv_fmax : RuntimeLibcallImpl; + def _ZGVsMxvv_fmaxf : RuntimeLibcallImpl; + def _ZGVsMxvv_fmin : RuntimeLibcallImpl; + def _ZGVsMxvv_fminf : RuntimeLibcallImpl; + def _ZGVsMxvv_fmod : RuntimeLibcallImpl; + def _ZGVsMxvv_fmodf : RuntimeLibcallImpl; + def _ZGVsMxvv_hypot : RuntimeLibcallImpl; + def _ZGVsMxvv_hypotf : RuntimeLibcallImpl; + def _ZGVsMxvv_ldexp : RuntimeLibcallImpl; + def _ZGVsMxvv_ldexpf : RuntimeLibcallImpl; + def _ZGVsMxvv_nextafter : RuntimeLibcallImpl; + def _ZGVsMxvv_nextafterf : RuntimeLibcallImpl; + def _ZGVsMxvv_pow : RuntimeLibcallImpl; + def _ZGVsMxvv_powf : RuntimeLibcallImpl; + def _ZGVsMxvvv_fma : RuntimeLibcallImpl; + def _ZGVsMxvvv_fmaf : RuntimeLibcallImpl; + def _ZGVsNxvl8_modf : RuntimeLibcallImpl; + def _ZGVsNxvl4_modff : RuntimeLibcallImpl; + def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl; + def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; + def _ZGVsNxvl8l8_sincospi : RuntimeLibcallImpl; +} + +defset list SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV = { + def Sleef_acosdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_acosfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_acoshdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_acoshfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_asindx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_asinfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_asinhdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_asinhfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atan2dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atan2fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atandx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atanfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atanhdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_atanhfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_cbrtdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_cbrtfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_copysigndx_rvvm2 : RuntimeLibcallImpl; + def Sleef_copysignfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_cosdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_cosfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_coshdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_coshfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_cospidx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_cospifx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_erfcdx_u15rvvm2 : RuntimeLibcallImpl; + def Sleef_erfcfx_u15rvvm2 : RuntimeLibcallImpl; + def Sleef_erfdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_erffx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_exp10dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_exp10fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_exp2dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_exp2fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_expdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_expfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_expm1dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_expm1fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_fdimdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fdimfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmadx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmafx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmaxdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmaxfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmindx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_fminfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_fmoddx_rvvm2 : RuntimeLibcallImpl; + def Sleef_fmodfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_hypotdx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_hypotfx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_ilogbdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_ilogbfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_ldexpdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_ldexpfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_lgammadx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_lgammafx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log10dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log10fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log1pdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log1pfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log2dx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_log2fx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_logdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_logfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_modfdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_modffx_rvvm2 : RuntimeLibcallImpl; + def Sleef_nextafterdx_rvvm2 : RuntimeLibcallImpl; + def Sleef_nextafterfx_rvvm2 : RuntimeLibcallImpl; + def Sleef_powdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_powfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sincosdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sincosfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sincospidx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sincospifx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sindx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sinfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sinhdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sinhfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_sinpidx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_sinpifx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_sqrtdx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_sqrtfx_u05rvvm2 : RuntimeLibcallImpl; + def Sleef_tandx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_tanfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_tanhdx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_tanhfx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_tgammadx_u10rvvm2 : RuntimeLibcallImpl; + def Sleef_tgammafx_u10rvvm2 : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// Arm Performance Libraries (ARMPL) functions +//===----------------------------------------------------------------------===// + +defset list ARMPL_VECFUNCS = { + def armpl_svacos_f32_x : RuntimeLibcallImpl; + def armpl_svacos_f64_x : RuntimeLibcallImpl; + def armpl_svacosh_f32_x : RuntimeLibcallImpl; + def armpl_svacosh_f64_x : RuntimeLibcallImpl; + def armpl_svasin_f32_x : RuntimeLibcallImpl; + def armpl_svasin_f64_x : RuntimeLibcallImpl; + def armpl_svasinh_f32_x : RuntimeLibcallImpl; + def armpl_svasinh_f64_x : RuntimeLibcallImpl; + def armpl_svatan2_f32_x : RuntimeLibcallImpl; + def armpl_svatan2_f64_x : RuntimeLibcallImpl; + def armpl_svatan_f32_x : RuntimeLibcallImpl; + def armpl_svatan_f64_x : RuntimeLibcallImpl; + def armpl_svatanh_f32_x : RuntimeLibcallImpl; + def armpl_svatanh_f64_x : RuntimeLibcallImpl; + def armpl_svcbrt_f32_x : RuntimeLibcallImpl; + def armpl_svcbrt_f64_x : RuntimeLibcallImpl; + def armpl_svcopysign_f32_x : RuntimeLibcallImpl; + def armpl_svcopysign_f64_x : RuntimeLibcallImpl; + def armpl_svcos_f32_x : RuntimeLibcallImpl; + def armpl_svcos_f64_x : RuntimeLibcallImpl; + def armpl_svcosh_f32_x : RuntimeLibcallImpl; + def armpl_svcosh_f64_x : RuntimeLibcallImpl; + def armpl_svcospi_f32_x : RuntimeLibcallImpl; + def armpl_svcospi_f64_x : RuntimeLibcallImpl; + def armpl_sverf_f32_x : RuntimeLibcallImpl; + def armpl_sverf_f64_x : RuntimeLibcallImpl; + def armpl_sverfc_f32_x : RuntimeLibcallImpl; + def armpl_sverfc_f64_x : RuntimeLibcallImpl; + def armpl_svexp10_f32_x : RuntimeLibcallImpl; + def armpl_svexp10_f64_x : RuntimeLibcallImpl; + def armpl_svexp2_f32_x : RuntimeLibcallImpl; + def armpl_svexp2_f64_x : RuntimeLibcallImpl; + def armpl_svexp_f32_x : RuntimeLibcallImpl; + def armpl_svexp_f64_x : RuntimeLibcallImpl; + def armpl_svexpm1_f32_x : RuntimeLibcallImpl; + def armpl_svexpm1_f64_x : RuntimeLibcallImpl; + def armpl_svfdim_f32_x : RuntimeLibcallImpl; + def armpl_svfdim_f64_x : RuntimeLibcallImpl; + def armpl_svfma_f32_x : RuntimeLibcallImpl; + def armpl_svfma_f64_x : RuntimeLibcallImpl; + def armpl_svfmax_f32_x : RuntimeLibcallImpl; + def armpl_svfmax_f64_x : RuntimeLibcallImpl; + def armpl_svfmin_f32_x : RuntimeLibcallImpl; + def armpl_svfmin_f64_x : RuntimeLibcallImpl; + def armpl_svfmod_f32_x : RuntimeLibcallImpl; + def armpl_svfmod_f64_x : RuntimeLibcallImpl; + def armpl_svhypot_f32_x : RuntimeLibcallImpl; + def armpl_svhypot_f64_x : RuntimeLibcallImpl; + def armpl_svilogb_f32_x : RuntimeLibcallImpl; + def armpl_svilogb_f64_x : RuntimeLibcallImpl; + def armpl_svldexp_f32_x : RuntimeLibcallImpl; + def armpl_svldexp_f64_x : RuntimeLibcallImpl; + def armpl_svlgamma_f32_x : RuntimeLibcallImpl; + def armpl_svlgamma_f64_x : RuntimeLibcallImpl; + def armpl_svlog10_f32_x : RuntimeLibcallImpl; + def armpl_svlog10_f64_x : RuntimeLibcallImpl; + def armpl_svlog1p_f32_x : RuntimeLibcallImpl; + def armpl_svlog1p_f64_x : RuntimeLibcallImpl; + def armpl_svlog2_f32_x : RuntimeLibcallImpl; + def armpl_svlog2_f64_x : RuntimeLibcallImpl; + def armpl_svlog_f32_x : RuntimeLibcallImpl; + def armpl_svlog_f64_x : RuntimeLibcallImpl; + def armpl_svmodf_f32_x : RuntimeLibcallImpl; + def armpl_svmodf_f64_x : RuntimeLibcallImpl; + def armpl_svnextafter_f32_x : RuntimeLibcallImpl; + def armpl_svnextafter_f64_x : RuntimeLibcallImpl; + def armpl_svpow_f32_x : RuntimeLibcallImpl; + def armpl_svpow_f64_x : RuntimeLibcallImpl; + def armpl_svsin_f32_x : RuntimeLibcallImpl; + def armpl_svsin_f64_x : RuntimeLibcallImpl; + def armpl_svsincos_f32_x : RuntimeLibcallImpl; + def armpl_svsincos_f64_x : RuntimeLibcallImpl; + def armpl_svsincospi_f32_x : RuntimeLibcallImpl; + def armpl_svsincospi_f64_x : RuntimeLibcallImpl; + def armpl_svsinh_f32_x : RuntimeLibcallImpl; + def armpl_svsinh_f64_x : RuntimeLibcallImpl; + def armpl_svsinpi_f32_x : RuntimeLibcallImpl; + def armpl_svsinpi_f64_x : RuntimeLibcallImpl; + def armpl_svsqrt_f32_x : RuntimeLibcallImpl; + def armpl_svsqrt_f64_x : RuntimeLibcallImpl; + def armpl_svtan_f32_x : RuntimeLibcallImpl; + def armpl_svtan_f64_x : RuntimeLibcallImpl; + def armpl_svtanh_f32_x : RuntimeLibcallImpl; + def armpl_svtanh_f64_x : RuntimeLibcallImpl; + def armpl_svtgamma_f32_x : RuntimeLibcallImpl; + def armpl_svtgamma_f64_x : RuntimeLibcallImpl; + def armpl_vacoshq_f32 : RuntimeLibcallImpl; + def armpl_vacoshq_f64 : RuntimeLibcallImpl; + def armpl_vacosq_f32 : RuntimeLibcallImpl; + def armpl_vacosq_f64 : RuntimeLibcallImpl; + def armpl_vasinhq_f32 : RuntimeLibcallImpl; + def armpl_vasinhq_f64 : RuntimeLibcallImpl; + def armpl_vasinq_f32 : RuntimeLibcallImpl; + def armpl_vasinq_f64 : RuntimeLibcallImpl; + def armpl_vatan2q_f32 : RuntimeLibcallImpl; + def armpl_vatan2q_f64 : RuntimeLibcallImpl; + def armpl_vatanhq_f32 : RuntimeLibcallImpl; + def armpl_vatanhq_f64 : RuntimeLibcallImpl; + def armpl_vatanq_f32 : RuntimeLibcallImpl; + def armpl_vatanq_f64 : RuntimeLibcallImpl; + def armpl_vcbrtq_f32 : RuntimeLibcallImpl; + def armpl_vcbrtq_f64 : RuntimeLibcallImpl; + def armpl_vcopysignq_f32 : RuntimeLibcallImpl; + def armpl_vcopysignq_f64 : RuntimeLibcallImpl; + def armpl_vcoshq_f32 : RuntimeLibcallImpl; + def armpl_vcoshq_f64 : RuntimeLibcallImpl; + def armpl_vcospiq_f32 : RuntimeLibcallImpl; + def armpl_vcospiq_f64 : RuntimeLibcallImpl; + def armpl_vcosq_f32 : RuntimeLibcallImpl; + def armpl_vcosq_f64 : RuntimeLibcallImpl; + def armpl_verfcq_f32 : RuntimeLibcallImpl; + def armpl_verfcq_f64 : RuntimeLibcallImpl; + def armpl_verfq_f32 : RuntimeLibcallImpl; + def armpl_verfq_f64 : RuntimeLibcallImpl; + def armpl_vexp10q_f32 : RuntimeLibcallImpl; + def armpl_vexp10q_f64 : RuntimeLibcallImpl; + def armpl_vexp2q_f32 : RuntimeLibcallImpl; + def armpl_vexp2q_f64 : RuntimeLibcallImpl; + def armpl_vexpm1q_f32 : RuntimeLibcallImpl; + def armpl_vexpm1q_f64 : RuntimeLibcallImpl; + def armpl_vexpq_f32 : RuntimeLibcallImpl; + def armpl_vexpq_f64 : RuntimeLibcallImpl; + def armpl_vfdimq_f32 : RuntimeLibcallImpl; + def armpl_vfdimq_f64 : RuntimeLibcallImpl; + def armpl_vfmaq_f32 : RuntimeLibcallImpl; + def armpl_vfmaq_f64 : RuntimeLibcallImpl; + def armpl_vfmaxq_f32 : RuntimeLibcallImpl; + def armpl_vfmaxq_f64 : RuntimeLibcallImpl; + def armpl_vfminq_f32 : RuntimeLibcallImpl; + def armpl_vfminq_f64 : RuntimeLibcallImpl; + def armpl_vfmodq_f32 : RuntimeLibcallImpl; + def armpl_vfmodq_f64 : RuntimeLibcallImpl; + def armpl_vhypotq_f32 : RuntimeLibcallImpl; + def armpl_vhypotq_f64 : RuntimeLibcallImpl; + def armpl_vilogbq_f32 : RuntimeLibcallImpl; + def armpl_vilogbq_f64 : RuntimeLibcallImpl; + def armpl_vldexpq_f32 : RuntimeLibcallImpl; + def armpl_vldexpq_f64 : RuntimeLibcallImpl; + def armpl_vlgammaq_f32 : RuntimeLibcallImpl; + def armpl_vlgammaq_f64 : RuntimeLibcallImpl; + def armpl_vlog10q_f32 : RuntimeLibcallImpl; + def armpl_vlog10q_f64 : RuntimeLibcallImpl; + def armpl_vlog1pq_f32 : RuntimeLibcallImpl; + def armpl_vlog1pq_f64 : RuntimeLibcallImpl; + def armpl_vlog2q_f32 : RuntimeLibcallImpl; + def armpl_vlog2q_f64 : RuntimeLibcallImpl; + def armpl_vlogq_f32 : RuntimeLibcallImpl; + def armpl_vlogq_f64 : RuntimeLibcallImpl; + def armpl_vmodfq_f32 : RuntimeLibcallImpl; + def armpl_vmodfq_f64 : RuntimeLibcallImpl; + def armpl_vnextafterq_f32 : RuntimeLibcallImpl; + def armpl_vnextafterq_f64 : RuntimeLibcallImpl; + def armpl_vpowq_f32 : RuntimeLibcallImpl; + def armpl_vpowq_f64 : RuntimeLibcallImpl; + def armpl_vsincospiq_f32 : RuntimeLibcallImpl; + def armpl_vsincospiq_f64 : RuntimeLibcallImpl; + def armpl_vsincosq_f32 : RuntimeLibcallImpl; + def armpl_vsincosq_f64 : RuntimeLibcallImpl; + def armpl_vsinhq_f32 : RuntimeLibcallImpl; + def armpl_vsinhq_f64 : RuntimeLibcallImpl; + def armpl_vsinpiq_f32 : RuntimeLibcallImpl; + def armpl_vsinpiq_f64 : RuntimeLibcallImpl; + def armpl_vsinq_f32 : RuntimeLibcallImpl; + def armpl_vsinq_f64 : RuntimeLibcallImpl; + def armpl_vsqrtq_f32 : RuntimeLibcallImpl; + def armpl_vsqrtq_f64 : RuntimeLibcallImpl; + def armpl_vtanhq_f32 : RuntimeLibcallImpl; + def armpl_vtanhq_f64 : RuntimeLibcallImpl; + def armpl_vtanq_f32 : RuntimeLibcallImpl; + def armpl_vtanq_f64 : RuntimeLibcallImpl; + def armpl_vtgammaq_f32 : RuntimeLibcallImpl; + def armpl_vtgammaq_f64 : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// AMD vector math library (AMDLIBM) functions +//===----------------------------------------------------------------------===// + +defset list AMDLIBM_VECFUNCS = { + def amd_vrd2_atan : RuntimeLibcallImpl; + def amd_vrd2_cbrt : RuntimeLibcallImpl; + def amd_vrd2_cos : RuntimeLibcallImpl; + def amd_vrd2_erf : RuntimeLibcallImpl; + def amd_vrd2_exp : RuntimeLibcallImpl; + def amd_vrd2_exp10 : RuntimeLibcallImpl; + def amd_vrd2_exp2 : RuntimeLibcallImpl; + def amd_vrd2_expm1 : RuntimeLibcallImpl; + def amd_vrd2_log : RuntimeLibcallImpl; + def amd_vrd2_log10 : RuntimeLibcallImpl; + def amd_vrd2_log1p : RuntimeLibcallImpl; + def amd_vrd2_log2 : RuntimeLibcallImpl; + def amd_vrd2_pow : RuntimeLibcallImpl; + def amd_vrd2_sin : RuntimeLibcallImpl; + def amd_vrd2_tan : RuntimeLibcallImpl; + def amd_vrd4_atan : RuntimeLibcallImpl; + def amd_vrd4_cos : RuntimeLibcallImpl; + def amd_vrd4_erf : RuntimeLibcallImpl; + def amd_vrd4_exp : RuntimeLibcallImpl; + def amd_vrd4_exp2 : RuntimeLibcallImpl; + def amd_vrd4_log : RuntimeLibcallImpl; + def amd_vrd4_log2 : RuntimeLibcallImpl; + def amd_vrd4_pow : RuntimeLibcallImpl; + def amd_vrd4_sin : RuntimeLibcallImpl; + def amd_vrd4_sincos : RuntimeLibcallImpl; + def amd_vrd4_tan : RuntimeLibcallImpl; + def amd_vrd8_asin : RuntimeLibcallImpl; + def amd_vrd8_atan : RuntimeLibcallImpl; + def amd_vrd8_cos : RuntimeLibcallImpl; + def amd_vrd8_erf : RuntimeLibcallImpl; + def amd_vrd8_exp : RuntimeLibcallImpl; + def amd_vrd8_exp2 : RuntimeLibcallImpl; + def amd_vrd8_log : RuntimeLibcallImpl; + def amd_vrd8_log2 : RuntimeLibcallImpl; + def amd_vrd8_pow : RuntimeLibcallImpl; + def amd_vrd8_sin : RuntimeLibcallImpl; + def amd_vrd8_sincos : RuntimeLibcallImpl; + def amd_vrd8_tan : RuntimeLibcallImpl; + def amd_vrs16_acosf : RuntimeLibcallImpl; + def amd_vrs16_asinf : RuntimeLibcallImpl; + def amd_vrs16_atanf : RuntimeLibcallImpl; + def amd_vrs16_cosf : RuntimeLibcallImpl; + def amd_vrs16_erff : RuntimeLibcallImpl; + def amd_vrs16_exp2f : RuntimeLibcallImpl; + def amd_vrs16_expf : RuntimeLibcallImpl; + def amd_vrs16_log10f : RuntimeLibcallImpl; + def amd_vrs16_log2f : RuntimeLibcallImpl; + def amd_vrs16_logf : RuntimeLibcallImpl; + def amd_vrs16_powf : RuntimeLibcallImpl; + def amd_vrs16_sincosf : RuntimeLibcallImpl; + def amd_vrs16_sinf : RuntimeLibcallImpl; + def amd_vrs16_tanf : RuntimeLibcallImpl; + def amd_vrs16_tanhf : RuntimeLibcallImpl; + def amd_vrs4_acosf : RuntimeLibcallImpl; + def amd_vrs4_asinf : RuntimeLibcallImpl; + def amd_vrs4_atanf : RuntimeLibcallImpl; + def amd_vrs4_cbrtf : RuntimeLibcallImpl; + def amd_vrs4_cosf : RuntimeLibcallImpl; + def amd_vrs4_coshf : RuntimeLibcallImpl; + def amd_vrs4_erff : RuntimeLibcallImpl; + def amd_vrs4_exp10f : RuntimeLibcallImpl; + def amd_vrs4_exp2f : RuntimeLibcallImpl; + def amd_vrs4_expf : RuntimeLibcallImpl; + def amd_vrs4_expm1f : RuntimeLibcallImpl; + def amd_vrs4_log10f : RuntimeLibcallImpl; + def amd_vrs4_log1pf : RuntimeLibcallImpl; + def amd_vrs4_log2f : RuntimeLibcallImpl; + def amd_vrs4_logf : RuntimeLibcallImpl; + def amd_vrs4_powf : RuntimeLibcallImpl; + def amd_vrs4_sincosf : RuntimeLibcallImpl; + def amd_vrs4_sinf : RuntimeLibcallImpl; + def amd_vrs4_tanf : RuntimeLibcallImpl; + def amd_vrs4_tanhf : RuntimeLibcallImpl; + def amd_vrs8_acosf : RuntimeLibcallImpl; + def amd_vrs8_asinf : RuntimeLibcallImpl; + def amd_vrs8_atanf : RuntimeLibcallImpl; + def amd_vrs8_cosf : RuntimeLibcallImpl; + def amd_vrs8_coshf : RuntimeLibcallImpl; + def amd_vrs8_erff : RuntimeLibcallImpl; + def amd_vrs8_exp2f : RuntimeLibcallImpl; + def amd_vrs8_expf : RuntimeLibcallImpl; + def amd_vrs8_log10f : RuntimeLibcallImpl; + def amd_vrs8_log2f : RuntimeLibcallImpl; + def amd_vrs8_logf : RuntimeLibcallImpl; + def amd_vrs8_powf : RuntimeLibcallImpl; + def amd_vrs8_sincosf : RuntimeLibcallImpl; + def amd_vrs8_sinf : RuntimeLibcallImpl; + def amd_vrs8_tanf : RuntimeLibcallImpl; + def amd_vrs8_tanhf : RuntimeLibcallImpl; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3ed84af6a8717..99d14a60c6ed1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,7 +4842,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results); if (!Expanded) { DAG.getContext()->emitError(Twine("no libcall available for ") + Node->getOperationName(&DAG)); @@ -4940,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb57d7f6..383a025a4d916 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,7 +1726,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + TLI.expandMultipleResultFPLibCall(DAG, LC, N, Results, CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c55e55df373e9..7d979caa8bf82 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1275,7 +1275,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results)) return; // TODO: Try to see if there's a narrower call available to use before @@ -1286,7 +1286,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = RTLIB::getMODF(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && - DAG.expandMultipleResultFPLibCall(LC, Node, Results, + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f05266967fb68..363c71d84694f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2467,167 +2467,6 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { return getZExtOrTrunc(Op, SDLoc(Op), ShTy); } -/// Given a store node \p StoreNode, return true if it is safe to fold that node -/// into \p FPNode, which expands to a library call with output pointers. -static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, - SDNode *FPNode) { - SmallVector Worklist; - SmallVector DeferredNodes; - SmallPtrSet Visited; - - // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). - for (SDValue Op : StoreNode->ops()) - if (Op.getNode() != FPNode) - Worklist.push_back(Op.getNode()); - - unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - while (!Worklist.empty()) { - const SDNode *Node = Worklist.pop_back_val(); - auto [_, Inserted] = Visited.insert(Node); - if (!Inserted) - continue; - - if (MaxSteps > 0 && Visited.size() >= MaxSteps) - return false; - - // Reached the FPNode (would result in a cycle). - // OR Reached CALLSEQ_START (would result in nested call sequences). - if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) - return false; - - if (Node->getOpcode() == ISD::CALLSEQ_END) { - // Defer looking into call sequences (so we can check we're outside one). - // We still need to look through these for the predecessor check. - DeferredNodes.push_back(Node); - continue; - } - - for (SDValue Op : Node->ops()) - Worklist.push_back(Op.getNode()); - } - - // True if we're outside a call sequence and don't have the FPNode as a - // predecessor. No cycles or nested call sequences possible. - return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, - MaxSteps); -} - -bool SelectionDAG::expandMultipleResultFPLibCall( - RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl &Results, - std::optional CallRetResNo) { - if (LC == RTLIB::UNKNOWN_LIBCALL) - return false; - - RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LC); - if (LibcallImpl == RTLIB::Unsupported) - return false; - - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - - // Find users of the node that store the results (and share input chains). The - // destination pointers can be used instead of creating stack allocations. - SDValue StoresInChain; - SmallVector ResultStores(NumResults); - for (SDNode *User : Node->users()) { - if (!ISD::isNormalStore(User)) - continue; - auto *ST = cast(User); - SDValue StoreValue = ST->getValue(); - unsigned ResNo = StoreValue.getResNo(); - // Ensure the store corresponds to an output pointer. - if (CallRetResNo == ResNo) - continue; - // Ensure the store to the default address space and not atomic or volatile. - if (!ST->isSimple() || ST->getAddressSpace() != 0) - continue; - // Ensure all store chains are the same (so they don't alias). - if (StoresInChain && ST->getChain() != StoresInChain) - continue; - // Ensure the store is properly aligned. - Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); - if (ST->getAlign() < - getDataLayout().getABITypeAlign(StoreType->getScalarType())) - continue; - // Avoid: - // 1. Creating cyclic dependencies. - // 2. Expanding the node to a call within a call sequence. - if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) - continue; - ResultStores[ResNo] = ST; - StoresInChain = ST->getChain(); - } - - TargetLowering::ArgListTy Args; - - // Pass the arguments. - for (const SDValue &Op : Node->op_values()) { - EVT ArgVT = Op.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(Ctx); - Args.emplace_back(Op, ArgTy); - } - - // Pass the output pointers. - SmallVector ResultPtrs(NumResults); - Type *PointerTy = PointerType::getUnqual(Ctx); - for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { - if (ResNo == CallRetResNo) - continue; - EVT ResVT = Node->getValueType(ResNo); - SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); - ResultPtrs[ResNo] = ResultPtr; - Args.emplace_back(ResultPtr, PointerTy); - } - - SDLoc DL(Node); - - if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { - // Pass the vector mask (if required). - EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); - SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); - Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); - } - - Type *RetType = CallRetResNo.has_value() - ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) - : Type::getVoidTy(Ctx); - SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = - getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), - TLI->getPointerTy(getDataLayout())); - TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallImplCallingConv(LibcallImpl), RetType, Callee, - std::move(Args)); - - auto [Call, CallChain] = TLI->LowerCallTo(CLI); - - for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { - if (ResNo == CallRetResNo) { - Results.push_back(Call); - continue; - } - MachinePointerInfo PtrInfo; - SDValue LoadResult = - getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); - SDValue OutChain = LoadResult.getValue(1); - - if (StoreSDNode *ST = ResultStores[ResNo]) { - // Replace store with the library call. - ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); - PtrInfo = ST->getPointerInfo(); - } else { - PtrInfo = MachinePointerInfo::getFixedStack( - getMachineFunction(), cast(ResultPtr)->getIndex()); - } - - Results.push_back(LoadResult); - } - - return true; -} - SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDLoc dl(Node); const TargetLowering &TLI = getTargetLoweringInfo(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b51d6649af2ec..bb64f4ee70280 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12126,6 +12126,167 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N, return Subvectors[0]; } +/// Given a store node \p StoreNode, return true if it is safe to fold that node +/// into \p FPNode, which expands to a library call with output pointers. +static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, + SDNode *FPNode) { + SmallVector Worklist; + SmallVector DeferredNodes; + SmallPtrSet Visited; + + // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). + for (SDValue Op : StoreNode->ops()) + if (Op.getNode() != FPNode) + Worklist.push_back(Op.getNode()); + + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + while (!Worklist.empty()) { + const SDNode *Node = Worklist.pop_back_val(); + auto [_, Inserted] = Visited.insert(Node); + if (!Inserted) + continue; + + if (MaxSteps > 0 && Visited.size() >= MaxSteps) + return false; + + // Reached the FPNode (would result in a cycle). + // OR Reached CALLSEQ_START (would result in nested call sequences). + if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) + return false; + + if (Node->getOpcode() == ISD::CALLSEQ_END) { + // Defer looking into call sequences (so we can check we're outside one). + // We still need to look through these for the predecessor check. + DeferredNodes.push_back(Node); + continue; + } + + for (SDValue Op : Node->ops()) + Worklist.push_back(Op.getNode()); + } + + // True if we're outside a call sequence and don't have the FPNode as a + // predecessor. No cycles or nested call sequences possible. + return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, + MaxSteps); +} + +bool TargetLowering::expandMultipleResultFPLibCall( + SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, + SmallVectorImpl &Results, + std::optional CallRetResNo) const { + if (LC == RTLIB::UNKNOWN_LIBCALL) + return false; + + RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) + return false; + + LLVMContext &Ctx = *DAG.getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + + // Find users of the node that store the results (and share input chains). The + // destination pointers can be used instead of creating stack allocations. + SDValue StoresInChain; + SmallVector ResultStores(NumResults); + for (SDNode *User : Node->users()) { + if (!ISD::isNormalStore(User)) + continue; + auto *ST = cast(User); + SDValue StoreValue = ST->getValue(); + unsigned ResNo = StoreValue.getResNo(); + // Ensure the store corresponds to an output pointer. + if (CallRetResNo == ResNo) + continue; + // Ensure the store to the default address space and not atomic or volatile. + if (!ST->isSimple() || ST->getAddressSpace() != 0) + continue; + // Ensure all store chains are the same (so they don't alias). + if (StoresInChain && ST->getChain() != StoresInChain) + continue; + // Ensure the store is properly aligned. + Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); + if (ST->getAlign() < + DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType())) + continue; + // Avoid: + // 1. Creating cyclic dependencies. + // 2. Expanding the node to a call within a call sequence. + if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) + continue; + ResultStores[ResNo] = ST; + StoresInChain = ST->getChain(); + } + + ArgListTy Args; + + // Pass the arguments. + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(Ctx); + Args.emplace_back(Op, ArgTy); + } + + // Pass the output pointers. + SmallVector ResultPtrs(NumResults); + Type *PointerTy = PointerType::getUnqual(Ctx); + for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { + if (ResNo == CallRetResNo) + continue; + EVT ResVT = Node->getValueType(ResNo); + SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT); + ResultPtrs[ResNo] = ResultPtr; + Args.emplace_back(ResultPtr, PointerTy); + } + + SDLoc DL(Node); + + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + // Pass the vector mask (if required). + EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT); + SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT); + Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); + } + + Type *RetType = CallRetResNo.has_value() + ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) + : Type::getVoidTy(Ctx); + SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode(); + SDValue Callee = DAG.getExternalSymbol(getLibcallImplName(LibcallImpl).data(), + getPointerTy(DAG.getDataLayout())); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( + getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args)); + + auto [Call, CallChain] = LowerCallTo(CLI); + + for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { + if (ResNo == CallRetResNo) { + Results.push_back(Call); + continue; + } + MachinePointerInfo PtrInfo; + SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain, + ResultPtr, PtrInfo); + SDValue OutChain = LoadResult.getValue(1); + + if (StoreSDNode *ST = ResultStores[ResNo]) { + // Replace store with the library call. + DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); + PtrInfo = ST->getPointerInfo(); + } else { + PtrInfo = MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + cast(ResultPtr)->getIndex()); + } + + Results.push_back(LoadResult); + } + + return true; +} + bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask,