diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 01359894b0421..ab14ed44fed52 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -186,6 +186,13 @@ struct RuntimeLibcallsInfo { return RTLIB::Unsupported; } + /// \returns the function type and attributes for the \p LibcallImpl, + /// depending on the target \p TT. If the function has incomplete type + /// information, return nullptr for the function type. + std::pair + getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL, + RTLIB::LibcallImpl LibcallImpl) const; + private: LLVM_ABI static iota_range lookupLibcallImplNameImpl(StringRef Name); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 431a81002074f..316aacdf6978e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -163,6 +163,8 @@ class SelectionDAGLegalize { RTLIB::Libcall CallI128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); + SDValue ExpandSincosStretLibCall(SDNode *Node) const; + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, @@ -2423,6 +2425,101 @@ static bool useSinCos(SDNode *Node) { return false; } +SDValue SelectionDAGLegalize::ExpandSincosStretLibCall(SDNode *Node) const { + // For iOS, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two S / D registers. + SDLoc dl(Node); + SDValue Arg = Node->getOperand(0); + EVT ArgVT = Arg.getValueType(); + RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT); + RTLIB::LibcallImpl SincosStret = TLI.getLibcallImpl(LC); + if (SincosStret == RTLIB::Unsupported) + return SDValue(); + + /// There are 3 different ABI cases to handle: + /// - Direct return of separate fields in registers + /// - Single return as vector elements + /// - sret struct + + const RTLIB::RuntimeLibcallsInfo &CallsInfo = TLI.getRuntimeLibcallsInfo(); + + const DataLayout &DL = DAG.getDataLayout(); + + auto [FuncTy, FuncAttrs] = CallsInfo.getFunctionTy( + *DAG.getContext(), TM.getTargetTriple(), DL, SincosStret); + + Type *SincosStretRetTy = FuncTy->getReturnType(); + CallingConv::ID CallConv = CallsInfo.getLibcallImplCallingConv(SincosStret); + StringRef LibcallImplName = CallsInfo.getLibcallImplName(SincosStret); + + SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(), + TLI.getProgramPointerTy(DL)); + + TargetLowering::ArgListTy Args; + SDValue SRet; + + int FrameIdx; + if (FuncTy->getParamType(0)->isPointerTy()) { + // Uses sret + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + AttributeSet PtrAttrs = FuncAttrs.getParamAttrs(0); + Type *StructTy = PtrAttrs.getStructRetType(); + const uint64_t ByteSize = DL.getTypeAllocSize(StructTy); + const Align StackAlign = DL.getPrefTypeAlign(StructTy); + + FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getFrameIndexTy(DL)); + + TargetLowering::ArgListEntry Entry(SRet, FuncTy->getParamType(0)); + Entry.IsSRet = true; + Entry.IndirectType = StructTy; + Entry.Alignment = StackAlign; + + Args.push_back(Entry); + Args.emplace_back(Arg, FuncTy->getParamType(1)); + } else { + Args.emplace_back(Arg, FuncTy->getParamType(0)); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallConv, SincosStretRetTy, Callee, std::move(Args)) + .setIsPostTypeLegalization(); + + std::pair CallResult = TLI.LowerCallTo(CLI); + + if (SRet) { + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, PtrInfo); + + TypeSize StoreSize = ArgVT.getStoreSize(); + + // Address of cos field. + SDValue Add = DAG.getObjectPtrOffset(dl, SRet, StoreSize); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + PtrInfo.getWithOffset(StoreSize)); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0), + LoadCos.getValue(0)); + } + + if (!CallResult.first.getValueType().isVector()) + return CallResult.first; + + SDValue SinVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, + DAG.getVectorIdxConstant(0, dl)); + SDValue CosVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, + DAG.getVectorIdxConstant(1, dl)); + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); +} + SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -4730,6 +4827,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::FSINCOS: case ISD::FSINCOSPI: { EVT VT = Node->getValueType(0); + + if (Node->getOpcode() == ISD::FSINCOS) { + RTLIB::Libcall SincosStret = RTLIB::getSINCOS_STRET(VT); + if (SincosStret != RTLIB::UNKNOWN_LIBCALL) { + if (SDValue Expanded = ExpandSincosStretLibCall(Node)) { + Results.push_back(Expanded); + Results.push_back(Expanded.getValue(1)); + break; + } + } + } + RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 77af29b9d70f6..2ce5719228a0d 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringTable.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -72,3 +74,80 @@ bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) { return false; } } + +std::pair +RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, + const DataLayout &DL, + RTLIB::LibcallImpl LibcallImpl) const { + static constexpr Attribute::AttrKind CommonFnAttrs[] = { + Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync, + Attribute::NoUnwind, Attribute::WillReturn}; + + switch (LibcallImpl) { + case RTLIB::impl___sincos_stret: + case RTLIB::impl___sincosf_stret: { + if (!darwinHasSinCosStret(TT)) // Non-darwin currently unexpected + return {}; + + Type *ScalarTy = LibcallImpl == RTLIB::impl___sincosf_stret + ? Type::getFloatTy(Ctx) + : Type::getDoubleTy(Ctx); + + AttrBuilder FuncAttrBuilder(Ctx); + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + + const bool UseSret = + TT.isX86_32() || ((TT.isARM() || TT.isThumb()) && + ARM::computeTargetABI(TT) == ARM::ARM_ABI_APCS); + + FuncAttrBuilder.addMemoryAttr(MemoryEffects::argumentOrErrnoMemOnly( + UseSret ? ModRefInfo::Mod : ModRefInfo::NoModRef, ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + if (UseSret) { + AttrBuilder AttrBuilder(Ctx); + StructType *StructTy = StructType::get(ScalarTy, ScalarTy); + AttrBuilder.addStructRetAttr(StructTy); + AttrBuilder.addAlignmentAttr(DL.getABITypeAlign(StructTy)); + FunctionType *FuncTy = FunctionType::get( + Type::getVoidTy(Ctx), {DL.getAllocaPtrType(Ctx), ScalarTy}, false); + + return {FuncTy, Attrs.addParamAttributes(Ctx, 0, AttrBuilder)}; + } + + Type *RetTy = + LibcallImpl == RTLIB::impl___sincosf_stret && TT.isX86_64() + ? static_cast(FixedVectorType::get(ScalarTy, 2)) + : static_cast(StructType::get(ScalarTy, ScalarTy)); + + return {FunctionType::get(RetTy, {ScalarTy}, false), Attrs}; + } + case RTLIB::impl_sqrtf: + case RTLIB::impl_sqrt: { + AttrBuilder FuncAttrBuilder(Ctx); + + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addMemoryAttr(MemoryEffects::errnoMemOnly(ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + Type *ScalarTy = LibcallImpl == RTLIB::impl_sqrtf ? Type::getFloatTy(Ctx) + : Type::getDoubleTy(Ctx); + FunctionType *FuncTy = FunctionType::get(ScalarTy, {ScalarTy}, false); + + Attrs = Attrs.addRetAttribute( + Ctx, Attribute::getWithNoFPClass(Ctx, fcNegInf | fcNegSubnormal | + fcNegNormal)); + return {FuncTy, Attrs}; + } + default: + return {}; + } + + return {}; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 60aa61e993b26..d08f9b94227a2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && - getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { - // Issue __sincos_stret if available. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } else { - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - } + // Issue __sincos_stret if available. + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // Make floating-point constants legal for the large code model, so they don't // become loads from the constant pool. @@ -5346,35 +5340,6 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, return SDValue(); } -SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, - SelectionDAG &DAG) const { - // For iOS, we want to call an alternative entry point: __sincos_stret, - // which returns the values in two S / D registers. - SDLoc DL(Op); - SDValue Arg = Op.getOperand(0); - EVT ArgVT = Arg.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - ArgListTy Args; - Args.emplace_back(Arg, ArgTy); - - RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 - : RTLIB::SINCOS_STRET_F32; - const char *LibcallName = getLibcallName(LC); - SDValue Callee = - DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); - - StructType *RetTy = StructType::get(ArgTy, ArgTy); - TargetLowering::CallLoweringInfo CLI(DAG); - CallingConv::ID CC = getLibcallCallingConv(LC); - CLI.setDebugLoc(DL) - .setChain(DAG.getEntryNode()) - .setLibCallee(CC, RetTy, Callee, std::move(Args)); - - std::pair CallResult = LowerCallTo(CLI); - return CallResult.first; -} - static MVT getSVEContainerType(EVT ContentTy); SDValue @@ -7723,8 +7688,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG); - case ISD::FSINCOS: - return LowerFSINCOS(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2cb8ed29f252a..70bfae717fb76 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -745,7 +745,6 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOOP_DEPENDENCE_MASK(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b0653457cbaf..92fae71121a81 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1312,8 +1312,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); } - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); // FP-ARMv8 implements a lot of rounding-like FP operations. if (Subtarget->hasFPARMv8Base()) { @@ -9855,76 +9855,6 @@ static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); } -SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { - // For iOS, we want to call an alternative entry point: __sincos_stret, - // return values are passed via sret. - SDLoc dl(Op); - SDValue Arg = Op.getOperand(0); - EVT ArgVT = Arg.getValueType(); - RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT); - RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC); - if (SincosStret == RTLIB::Unsupported) - return SDValue(); - - assert(Subtarget->isTargetDarwin()); - - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - auto PtrVT = getPointerTy(DAG.getDataLayout()); - - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - - // Pair of floats / doubles used to pass the result. - Type *RetTy = StructType::get(ArgTy, ArgTy); - auto &DL = DAG.getDataLayout(); - - ArgListTy Args; - bool ShouldUseSRet = getTM().isAPCS_ABI(); - SDValue SRet; - if (ShouldUseSRet) { - // Create stack object for sret. - const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); - const Align StackAlign = DL.getPrefTypeAlign(RetTy); - int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); - SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); - - ArgListEntry Entry(SRet, PointerType::getUnqual(RetTy->getContext())); - Entry.IsSExt = false; - Entry.IsZExt = false; - Entry.IsSRet = true; - Args.push_back(Entry); - RetTy = Type::getVoidTy(*DAG.getContext()); - } - - Args.emplace_back(Arg, ArgTy); - - StringRef LibcallName = getLibcallImplName(SincosStret); - CallingConv::ID CC = getLibcallImplCallingConv(SincosStret); - SDValue Callee = DAG.getExternalSymbol(LibcallName.data(), getPointerTy(DL)); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) - .setCallee(CC, RetTy, Callee, std::move(Args)) - .setDiscardResult(ShouldUseSRet); - std::pair CallResult = LowerCallTo(CLI); - - if (!ShouldUseSRet) - return CallResult.first; - - SDValue LoadSin = - DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); - - // Address of cos field. - SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, - DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); - SDValue LoadCos = - DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); - - SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); - return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, - LoadSin.getValue(0), LoadCos.getValue(0)); -} - SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed, SDValue &Chain) const { @@ -10726,8 +10656,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECREDUCE_SMAX: return LowerVecReduceMinMax(Op, DAG, Subtarget); case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); - case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); + case ISD::ATOMIC_STORE: + return LowerAtomicLoadStore(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); case ISD::DYNAMIC_STACKALLOC: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bf3438b0d8803..bc2fec3c1bdb5 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -901,7 +901,6 @@ class VectorType; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const; void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b97b5089cb0a3..aa4f0a7eb909b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2572,8 +2572,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Combine sin / cos into _sincos_stret if it is available. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); if (Subtarget.isTargetWin64()) { setOperationAction(ISD::SDIV, MVT::i128, Custom); @@ -33004,61 +33004,6 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } -static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Arg = Op.getOperand(0); - EVT ArgVT = Arg.getValueType(); - bool isF64 = ArgVT == MVT::f64; - - RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; - const char *LibcallName = TLI.getLibcallName(LC); - if (!LibcallName) - return SDValue(); - - assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); - - // For MacOSX, we want to call an alternative entry point: __sincos_stret, - // which returns the values as { float, float } (in XMM0) or - // { double, double } (which is returned in XMM0, XMM1). - SDLoc dl(Op); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - TargetLowering::ArgListTy Args; - Args.emplace_back(Arg, ArgTy); - - // Only optimize x86_64 for now. i386 is a bit messy. For f32, - // the small struct {f32, f32} is returned in (eax, edx). For f64, - // the results are returned via SRet in memory. - SDValue Callee = - DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); - - Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) - : (Type *)FixedVectorType::get(ArgTy, 2); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(DAG.getEntryNode()) - .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)) - .setIsPostTypeLegalization(); - - std::pair CallResult = TLI.LowerCallTo(CLI); - - if (isF64) - // Returned in xmm0 and xmm1. - return CallResult.first; - - // Returned in bits 0:31 and 32:64 xmm0. - SDValue SinVal = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, - DAG.getVectorIdxConstant(0, dl)); - SDValue CosVal = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, - DAG.getVectorIdxConstant(1, dl)); - SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); - return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); -} - /// Widen a vector input to a vector of NVT. The /// input vector must have the same element type as NVT. static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, @@ -33663,7 +33608,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ABDS: case ISD::ABDU: return LowerABD(Op, Subtarget, DAG); case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG); - case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); diff --git a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp index 0642d51cd2c21..6d4436b92c119 100644 --- a/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp +++ b/llvm/lib/Transforms/Utils/DeclareRuntimeLibcalls.cpp @@ -16,22 +16,62 @@ using namespace llvm; +static void mergeAttributes(LLVMContext &Ctx, const Module &M, + const DataLayout &DL, const Triple &TT, + Function *Func, FunctionType *FuncTy, + AttributeList FuncAttrs) { + AttributeList OldAttrs = Func->getAttributes(); + AttributeList NewAttrs = OldAttrs; + + { + AttrBuilder OldBuilder(Ctx, OldAttrs.getFnAttrs()); + AttrBuilder NewBuilder(Ctx, FuncAttrs.getFnAttrs()); + OldBuilder.merge(NewBuilder); + NewAttrs = NewAttrs.addFnAttributes(Ctx, OldBuilder); + } + + { + AttrBuilder OldBuilder(Ctx, OldAttrs.getRetAttrs()); + AttrBuilder NewBuilder(Ctx, FuncAttrs.getRetAttrs()); + OldBuilder.merge(NewBuilder); + NewAttrs = NewAttrs.addRetAttributes(Ctx, OldBuilder); + } + + for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) { + AttrBuilder OldBuilder(Ctx, OldAttrs.getParamAttrs(I)); + AttrBuilder NewBuilder(Ctx, FuncAttrs.getParamAttrs(I)); + OldBuilder.merge(NewBuilder); + NewAttrs = NewAttrs.addParamAttributes(Ctx, I, OldBuilder); + } + + Func->setAttributes(NewAttrs); +} + PreservedAnalyses DeclareRuntimeLibcallsPass::run(Module &M, ModuleAnalysisManager &MAM) { RTLIB::RuntimeLibcallsInfo RTLCI(M.getTargetTriple()); LLVMContext &Ctx = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + const Triple &TT = M.getTargetTriple(); for (RTLIB::LibcallImpl Impl : RTLCI.getLibcallImpls()) { if (Impl == RTLIB::Unsupported) continue; - // TODO: Declare with correct type, calling convention, and attributes. + auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(Ctx, TT, DL, Impl); - FunctionType *FuncTy = - FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true); + // TODO: Declare with correct type, calling convention, and attributes. + if (!FuncTy) + FuncTy = FunctionType::get(Type::getVoidTy(Ctx), {}, /*IsVarArgs=*/true); StringRef FuncName = RTLCI.getLibcallImplName(Impl); - M.getOrInsertFunction(FuncName, FuncTy); + + Function *Func = + cast(M.getOrInsertFunction(FuncName, FuncTy).getCallee()); + if (Func->getFunctionType() == FuncTy) { + mergeAttributes(Ctx, M, DL, TT, Func, FuncTy, FuncAttrs); + Func->setCallingConv(RTLCI.getLibcallImplCallingConv(Impl)); + } } return PreservedAnalyses::none(); diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll index ee3a0539bf300..c005316f07f06 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/basic.ll @@ -11,5 +11,9 @@ define float @sinf(float %x) { } ; CHECK: declare void @acosf(...) + +; CHECK: declare nofpclass(ninf nsub nnorm) float @sqrtf(float) [[SQRT_ATTRS:#[0-9]+]] +; CHECK: declare nofpclass(ninf nsub nnorm) double @sqrt(double) [[SQRT_ATTRS:#[0-9]+]] + ; CHECK: declare void @__umodti3(...) diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll new file mode 100644 index 0000000000000..ffbf11d4106dc --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/merge_attributes.ll @@ -0,0 +1,11 @@ +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define noundef nofpclass(nan) float @sqrtf(float %x) "foo" { + %ret = call float asm "; $0 = sqrt($1)", "=r,r"(float %x) + ret float %ret +} + +; FIXME: Individual fields of nofpclass not merged +; CHECK: define noundef nofpclass(ninf nsub nnorm) float @sqrtf(float %x) [[SQRT_ATTR:#[0-9]+]] { + +; CHECK: attributes [[SQRT_ATTR]] = { nocallback nofree nosync nounwind willreturn memory(errnomem: write) "foo" } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll new file mode 100644 index 0000000000000..0d0e3da25eea7 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sincos_stret.ll @@ -0,0 +1,23 @@ +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macos10.9 < %s | FileCheck -check-prefixes=CHECK,X64 %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=arm64-apple-macos10.9 < %s | FileCheck -check-prefixes=CHECK,STRUCT %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=thumbv7k-apple-watchos2.0 < %s | FileCheck -check-prefixes=CHECK,STRUCT %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=armv7-apple-ios7 < %s | FileCheck -check-prefix=SRET %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=thumbv7-apple-ios7 < %s | FileCheck -check-prefix=SRET %s + +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=armv7-apple-ios6 < %s | FileCheck -check-prefix=NONE %s +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macos10.8 < %s | FileCheck -check-prefix=NONE %s + +; X64: declare <2 x float> @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] +; X64: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] + +; STRUCT: declare { float, float } @__sincosf_stret(float) [[SINCOS_ATTRS:#[0-9]+]] +; STRUCT: declare { double, double } @__sincos_stret(double) [[SINCOS_ATTRS:#[0-9]+]] + +; SRET: declare void @__sincosf_stret(ptr sret({ float, float }) align 4, float) [[SINCOS_ATTRS:#[0-9]+]] +; SRET: declare void @__sincos_stret(ptr sret({ double, double }) align 4, double) [[SINCOS_ATTRS:#[0-9]+]] + +; CHECK: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(errnomem: write) } +; SRET: attributes [[SINCOS_ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write, errnomem: write) } + +; NONE-NOT: __sincos_stret +; NONE-NOT: __sincosf_stret diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/wrong_declaration.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/wrong_declaration.ll new file mode 100644 index 0000000000000..2451010df5b75 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/wrong_declaration.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=x86_64-apple-macos10.9 < %s | FileCheck %s + +; Make sure there is no crash if there are definitions or declarations +; with the wrong type signature. + +; CHECK: define void @sqrtf() { +define void @sqrtf() { + ret void +} + +; CHECK: define float @sqrt(float %0) { +define float @sqrt(float) { + ret float 0.0 +} + +; CHECK: declare double @__sincos_stret(double) +declare double @__sincos_stret(double) + +; CHECK: declare { float, float } @__sincosf_stret(float) +declare { float, float } @__sincosf_stret(float) +