diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1c167af4b0478..a52ad41d0f1b3 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -334,7 +334,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; case Intrinsic::sincos: - LC = RTLIB::getSINCOS(ScalarVT); + LC = RTLIB::getSINCOS(VT); + if (LC == RTLIB::UNKNOWN_LIBCALL) + LC = RTLIB::getSINCOS(ScalarVT); + else if (VT.isVector()) + IsVectorCall = true; + break; default: return std::nullopt; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 228629a7c86f0..ee80606ed0dbf 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { } foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { + def SINCOS_#VecTy : RuntimeLibcall; def SINCOSPI_#VecTy : RuntimeLibcall; } @@ -1092,6 +1093,11 @@ def __security_check_cookie_arm64ec : RuntimeLibcallImpl SleefLibcalls = { + def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; + def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl; def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl; def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl; @@ -1103,6 +1109,13 @@ defset list SleefLibcalls = { //===----------------------------------------------------------------------===// defset list ARMPLLibcalls = { + def armpl_vsincosq_f64 + : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_vsincosq_f32 + : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_svsincos_f64_x : RuntimeLibcallImpl; + def armpl_svsincos_f32_x : RuntimeLibcallImpl; + def armpl_vsincospiq_f32 : RuntimeLibcallImpl; def armpl_vsincospiq_f64 : RuntimeLibcallImpl; def armpl_svsincospi_f32_x : RuntimeLibcallImpl; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f5a54497c8a98..78d8ea0676dd7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1268,10 +1268,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { return; break; - + case ISD::FSINCOS: case ISD::FSINCOSPI: { EVT VT = Node->getValueType(0); - RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT); + RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS + ? RTLIB::getSINCOS(VT) + : RTLIB::getSINCOSPI(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) return; @@ -1280,14 +1282,6 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { // scalarizing. break; } - case ISD::FSINCOS: { - // FIXME: Try to directly match vector case like fsincospi - EVT VT = Node->getValueType(0).getVectorElementType(); - RTLIB::Libcall LC = RTLIB::getSINCOS(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) - return; - break; - } case ISD::FMODF: { EVT VT = Node->getValueType(0).getVectorElementType(); RTLIB::Libcall LC = RTLIB::getMODF(VT); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 814b4b57a0b9b..b4eb6c357e10e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -425,6 +425,24 @@ RTLIB::Libcall RTLIB::getCOS(EVT RetVT) { } RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::SINCOS_V4F32; + case MVT::v2f64: + return RTLIB::SINCOS_V2F64; + case MVT::nxv4f32: + return RTLIB::SINCOS_NXV4F32; + case MVT::nxv2f64: + return RTLIB::SINCOS_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, SINCOS_F32, SINCOS_F64, SINCOS_F80, SINCOS_F128, SINCOS_PPCF128); } diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 795621701d910..e66b9adb43ac4 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -43,17 +43,26 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, switch (ClVectorLibrary) { case VectorLibrary::SLEEFGNUABI: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, + {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, + RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf, + RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, RTLIB::impl__ZGVsNxvl4l4_sincospif, RTLIB::impl__ZGVsNxvl8l8_sincospi}) setAvailable(Impl); break; case VectorLibrary::ArmPL: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, + RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x, + RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, RTLIB::impl_armpl_svsincospi_f32_x, RTLIB::impl_armpl_svsincospi_f64_x}) setAvailable(Impl); + + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32}) + setLibcallImplCallingConv(Impl, CallingConv::AArch64_VectorCall); + break; default: break; @@ -188,6 +197,14 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN2vl8l8_sincos: + case RTLIB::impl__ZGVnN4vl4l4_sincosf: + case RTLIB::impl__ZGVsNxvl8l8_sincos: + case RTLIB::impl__ZGVsNxvl4l4_sincosf: + case RTLIB::impl_armpl_vsincosq_f64: + case RTLIB::impl_armpl_vsincosq_f32: + case RTLIB::impl_armpl_svsincos_f64_x: + case RTLIB::impl_armpl_svsincos_f32_x: case RTLIB::impl__ZGVnN4vl4l4_sincospif: case RTLIB::impl__ZGVnN2vl8l8_sincospi: case RTLIB::impl__ZGVsNxvl4l4_sincospif: @@ -201,11 +218,20 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif || LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 || - LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x; + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || + LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincosf || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_vsincosq_f32 || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x; + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); unsigned EC = IsF32 ? 4 : 2; - bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincos || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x || + LibcallImpl == RTLIB::impl_armpl_svsincos_f64_x || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi || LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x; @@ -245,6 +271,8 @@ bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { /// FIXME: This should be generated by tablegen and support the argument at an /// arbitrary position switch (Impl) { + case RTLIB::impl_armpl_svsincos_f32_x: + case RTLIB::impl_armpl_svsincos_f64_x: case RTLIB::impl_armpl_svsincospi_f32_x: case RTLIB::impl_armpl_svsincospi_f64_x: return true; diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll index c45f319f80122..1d9cf6a5d77fe 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -1,12 +1,21 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -; CHECK: declare void @armpl_svsincospi_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] +; CHECK: declare void @armpl_svsincos_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] -; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] +; CHECK: declare void @armpl_svsincos_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + + ; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll index 7972e0ca1c487..2c6900761b1c0 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -1,12 +1,20 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] -; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVsNxvl4l4_sincosf(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl8l8_sincos(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] -; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }