-
Notifications
You must be signed in to change notification settings - Fork 15.2k
RuntimeLibcalls: Remove incorrect sincospi from most targets #166982
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RuntimeLibcalls: Remove incorrect sincospi from most targets #166982
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-arm Author: Matt Arsenault (arsenm) Changessincospi/sincospif/sincospil does not appear to exist on common Most of this patch is working to preserve codegen when a vector The multiple result libcalls have an aberrant process where the Eventually all of this logic should be contained in RuntimeLibcalls, Patch is 56.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166982.diff 15 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f58525754d7a5..1c167af4b0478 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -316,12 +316,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
EVT ScalarVT = VT.getScalarType();
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ /// Migration flag. IsVectorCall cases directly know about the vector
+ /// libcall in RuntimeLibcallsInfo and shouldn't try to use
+ /// LibInfo->getVectorMappingInfo.
+ bool IsVectorCall = false;
+
switch (ICA.getID()) {
case Intrinsic::modf:
LC = RTLIB::getMODF(ScalarVT);
break;
case Intrinsic::sincospi:
- LC = RTLIB::getSINCOSPI(ScalarVT);
+ LC = RTLIB::getSINCOSPI(VT);
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ LC = RTLIB::getSINCOSPI(ScalarVT);
+ else if (VT.isVector())
+ IsVectorCall = true;
+
break;
case Intrinsic::sincos:
LC = RTLIB::getSINCOS(ScalarVT);
@@ -345,17 +355,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
LLVMContext &Ctx = RetTy->getContext();
ElementCount VF = getVectorizedTypeVF(RetTy);
VecDesc const *VD = nullptr;
- for (bool Masked : {false, true}) {
- if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
- break;
+
+ if (!IsVectorCall) {
+ for (bool Masked : {false, true}) {
+ if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
+ break;
+ }
+ if (!VD)
+ return std::nullopt;
}
- if (!VD)
- return std::nullopt;
// Cost the call + mask.
auto Cost =
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
- if (VD->isMasked()) {
+
+ if ((VD && VD->isMasked()) ||
+ (IsVectorCall &&
+ RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) {
auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
VecTy, {}, CostKind, 0, nullptr, {});
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 0dd4f23c6d85f..c8db97cc42a30 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1725,9 +1725,17 @@ class SelectionDAG {
/// value.
LLVM_ABI bool
expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
- SmallVectorImpl<SDValue> &Results,
+ SmallVectorImpl<SDValue> &Results, EVT CallType,
std::optional<unsigned> CallRetResNo = {});
+ // FIXME: Ths should be removed, and form using RTLIB::Libcall should be
+ // preferred. Callers should resolve the exact type libcall to use.
+ LLVM_ABI bool
+ expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC,
+ SDNode *Node, SmallVectorImpl<SDValue> &Results,
+ std::optional<unsigned> CallRetResNo = {},
+ bool IsVectorMasked = false);
+
/// Expand the specified \c ISD::VAARG node as the Legalize pass would.
LLVM_ABI SDValue expandVAArg(SDNode *Node);
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 1a752aed31992..a0b52395498c5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -975,10 +975,6 @@ def frexpf : RuntimeLibcallImpl<FREXP_F32>;
def frexp : RuntimeLibcallImpl<FREXP_F64>;
defm frexpl : LibmLongDoubleLibCall;
-def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
-def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
-defm sincospil : LibmLongDoubleLibCall;
-
def modff : RuntimeLibcallImpl<MODF_F32>;
def modf : RuntimeLibcallImpl<MODF_F64>;
defm modfl : LibmLongDoubleLibCall;
@@ -1055,6 +1051,15 @@ def sincosf : RuntimeLibcallImpl<SINCOS_F32>;
def sincos : RuntimeLibcallImpl<SINCOS_F64>;
defm sincosl : LibmLongDoubleLibCall;
+// Exists in sun math library
+def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
+def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
+defm sincospil : LibmLongDoubleLibCall;
+
+// Exists on macOS
+def __sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
+def __sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
+
def bzero : RuntimeLibcallImpl<BZERO>;
def __bzero : RuntimeLibcallImpl<BZERO>;
@@ -1232,7 +1237,9 @@ defvar DefaultLibcallImpls32 = (add DefaultRuntimeLibcallImpls);
defvar DefaultLibcallImpls64 = (add DefaultRuntimeLibcallImpls,
Int128RTLibcalls);
-defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret),
+// TODO: Guessing sincospi added at same time as sincos_stret
+defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret,
+ __sincospif, __sincospi),
darwinHasSinCosStret>;
defvar DarwinExp10 = LibcallImpls<(add __exp10f, __exp10), darwinHasExp10>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 316aacdf6978e..a0baf821698a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,9 +4842,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
- bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
- if (!Expanded)
- llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!");
+ bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT);
+ if (!Expanded) {
+ DAG.getContext()->emitError(Twine("no libcall available for ") +
+ Node->getOperationName(&DAG));
+ SDValue Poison = DAG.getPOISON(VT);
+ Results.push_back(Poison);
+ Results.push_back(Poison);
+ }
+
break;
}
case ISD::FLOG:
@@ -4934,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
: RTLIB::getFREXP(VT);
- bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+ bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
/*CallRetResNo=*/0);
if (!Expanded)
llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb57d7f6..29c4dac12a81a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,7 +1726,8 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
SmallVector<SDValue> Results;
- DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+ DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0),
+ CallRetResNo);
for (auto [ResNo, Res] : enumerate(Results)) {
SDValue Lo, Hi;
GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 94751be5b7986..f5a54497c8a98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1268,20 +1268,30 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
break;
- case ISD::FSINCOS:
+
case ISD::FSINCOSPI: {
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT);
+ if (LC != RTLIB::UNKNOWN_LIBCALL &&
+ DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
+ return;
+
+ // TODO: Try to see if there's a narrower call available to use before
+ // scalarizing.
+ break;
+ }
+ case ISD::FSINCOS: {
+ // FIXME: Try to directly match vector case like fsincospi
EVT VT = Node->getValueType(0).getVectorElementType();
- RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
- ? RTLIB::getSINCOS(VT)
- : RTLIB::getSINCOSPI(VT);
- if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
+ RTLIB::Libcall LC = RTLIB::getSINCOS(VT);
+ if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
return;
break;
}
case ISD::FMODF: {
- RTLIB::Libcall LC =
- RTLIB::getMODF(Node->getValueType(0).getVectorElementType());
- if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+ EVT VT = Node->getValueType(0).getVectorElementType();
+ RTLIB::Libcall LC = RTLIB::getMODF(VT);
+ if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
/*CallRetResNo=*/0))
return;
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbc1d734cfef5..a69216e1a0e7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2515,18 +2515,20 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
bool SelectionDAG::expandMultipleResultFPLibCall(
RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
- std::optional<unsigned> CallRetResNo) {
- LLVMContext &Ctx = *getContext();
- EVT VT = Node->getValueType(0);
- unsigned NumResults = Node->getNumValues();
-
+ EVT CallVT, std::optional<unsigned> CallRetResNo) {
if (LC == RTLIB::UNKNOWN_LIBCALL)
return false;
- const char *LCName = TLI->getLibcallName(LC);
- if (!LCName)
+ EVT VT = Node->getValueType(0);
+
+ RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC);
+ if (Impl == RTLIB::Unsupported)
return false;
+ StringRef LCName = TLI->getLibcallImplName(Impl);
+
+ // FIXME: This should not use TargetLibraryInfo. There should be
+ // RTLIB::Libcall entries for each used vector type, and directly matched.
auto getVecDesc = [&]() -> VecDesc const * {
for (bool Masked : {false, true}) {
if (VecDesc const *VD = getLibInfo().getVectorMappingInfo(
@@ -2539,9 +2541,34 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// For vector types, we must find a vector mapping for the libcall.
VecDesc const *VD = nullptr;
- if (VT.isVector() && !(VD = getVecDesc()))
+ if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc()))
return false;
+ bool IsMasked = (VD && VD->isMasked()) ||
+ RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl);
+
+ // This wrapper function exists because getVectorMappingInfo works in terms of
+ // function names instead of RTLIB enums.
+
+ // FIXME: If we used a vector mapping, this assumes the calling convention of
+ // the vector function is the same as the scalar.
+
+ StringRef Name = VD ? VD->getVectorFnName() : LCName;
+
+ return expandMultipleResultFPLibCall(Name,
+ TLI->getLibcallImplCallingConv(Impl),
+ Node, Results, CallRetResNo, IsMasked);
+}
+
+// FIXME: This belongs in TargetLowering
+bool SelectionDAG::expandMultipleResultFPLibCall(
+ StringRef Name, CallingConv::ID CC, SDNode *Node,
+ SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo,
+ bool IsMasked) {
+ LLVMContext &Ctx = *getContext();
+ EVT VT = Node->getValueType(0);
+ unsigned NumResults = Node->getNumValues();
+
// Find users of the node that store the results (and share input chains). The
// destination pointers can be used instead of creating stack allocations.
SDValue StoresInChain;
@@ -2599,7 +2626,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
SDLoc DL(Node);
// Pass the vector mask (if required).
- if (VD && VD->isMasked()) {
+ if (IsMasked) {
EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT);
SDValue Mask = getBoolConstant(true, DL, MaskVT, VT);
Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
@@ -2609,11 +2636,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
: Type::getVoidTy(Ctx);
SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
- SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
- TLI->getPointerTy(getDataLayout()));
+ SDValue Callee =
+ getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout()));
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
- TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args));
+ CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee,
+ std::move(Args));
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 05a854a0bf3fa..5bce539c45341 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FROUNDEVEN, VT, Action);
setOperationAction(ISD::FTRUNC, VT, Action);
setOperationAction(ISD::FLDEXP, VT, Action);
+ setOperationAction(ISD::FSINCOSPI, VT, Action);
};
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll
new file mode 100644
index 0000000000000..d074d9ae24641
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll
@@ -0,0 +1,13 @@
+; RUN: not llc -mtriple=aarch64-gnu-linux -filetype=null %s 2>&1 | FileCheck %s
+
+; CHECK: error: no libcall available for fsincospi
+define { float, float } @test_sincospi_f32(float %a) {
+ %result = call { float, float } @llvm.sincospi.f32(float %a)
+ ret { float, float } %result
+}
+
+; CHECK: error: no libcall available for fsincospi
+define { double, double } @test_sincospi_f64(double %a) {
+ %result = call { double, double } @llvm.sincospi.f64(double %a)
+ ret { double, double } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
index d1d7d92adc05a..b386df077c09d 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
@@ -1,268 +1,250 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=arm64-apple-macosx10.9 < %s | FileCheck %s
-define { half, half } @test_sincospi_f16(half %a) {
+define { half, half } @test_sincospi_f16(half %a) #0 {
; CHECK-LABEL: test_sincospi_f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: add x1, sp, #8
-; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: bl ___sincospif
; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: fcvt h1, s1
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%result = call { half, half } @llvm.sincospi.f16(half %a)
ret { half, half } %result
}
-define half @test_sincospi_f16_only_use_sin(half %a) {
+define half @test_sincospi_f16_only_use_sin(half %a) #0 {
; CHECK-LABEL: test_sincospi_f16_only_use_sin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: add x1, sp, #8
-; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: bl ___sincospif
; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%result = call { half, half } @llvm.sincospi.f16(half %a)
%result.0 = extractvalue { half, half } %result, 0
ret half %result.0
}
-define half @test_sincospi_f16_only_use_cos(half %a) {
+define half @test_sincospi_f16_only_use_cos(half %a) #0 {
; CHECK-LABEL: test_sincospi_f16_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: add x0, sp, #12
; CHECK-NEXT: add x1, sp, #8
-; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: bl ___sincospif
; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%result = call { half, half } @llvm.sincospi.f16(half %a)
%result.1 = extractvalue { half, half } %result, 1
ret half %result.1
}
-define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) {
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 {
; CHECK-LABEL: test_sincospi_v2f16:
-; CHECK: // %bb.0:
+; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: add x0, sp, #36
-; CHECK-NEXT: add x1, sp, #32
-; CHECK-NEXT: fcvt s0, h1
-; CHECK-NEXT: bl sincospif
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0[1]
+; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill
; CHECK-NEXT: add x0, sp, #28
; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl ___sincospif
+; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincospif
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl ___sincospif
+; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: mov h0, v0[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl ___sincospif
+; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload
; CHECK-NEXT: add x0, sp, #44
; CHECK-NEXT: add x1, sp, #40
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincospif
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #60
-; CHECK-NEXT: add x1, s...
[truncated]
|
ilovepi
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, though I'm far from familiar w/ the platform implications, and would leave that decision making to other reviewers. The code changes and look fine to me, and align with changes @arsenm has discussed publicly for some time now.
2d1bd67 to
b13fb13
Compare
ca4a71a to
21d3764
Compare
Add libcall entries for sleef and armpl sincospi implementations. This is the start of adding the vector library functions; eventually they should all be tracked here. I'm starting with this case because this is a prerequisite to fix reporting sincospi calls which do not exist on any common targets without regressing vector codegen when these libraries are available.
sincospi/sincospif/sincospil does not appear to exist on common targets. Darwin targets have __sincospi and __sincospif, so define and use those implementations. I have no idea what version added those calls, so I'm just guessing it's the same conditions as __sincos_stret. Most of this patch is working to preserve codegen when a vector library is explicitly enabled. This only covers sleef and armpl, as those are the only cases tested. The multiple result libcalls have an aberrant process where the legalizer looks for the scalar type's libcall in RuntimeLibcalls, and then cross references TargetLibraryInfo to find a matching vector call. This was unworkable in the sincospi case, since the common case is there is no scalar call available. To preserve codegen if the call is available, first try to match a libcall with the vector type before falling back on the old scalar search. Eventually all of this logic should be contained in RuntimeLibcalls, without the link to TargetLibraryInfo. In principle we should perform the same legalization logic as for an ordinary operation, trying to find a matching subvector type with a libcall.
21d3764 to
7dced40
Compare
b13fb13 to
500dbae
Compare
| @@ -0,0 +1,25 @@ | |||
| ; XFAIL: * | |||
| ; FIXME: asserts | |||
| ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux -filetype=null \ | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @arsenm
With EXPENSIVE_CHECKS this testcase hangs and eventually times out so the XFAIL doesn't help.
It gets stuck in DAGTypeLegalizer::PerformExpensiveChecks().
Can also be seen with a normal build if you add -enable-legalize-types-checking to the llc command.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.

sincospi/sincospif/sincospil does not appear to exist on common
targets. Darwin targets have __sincospi and __sincospif, so define
and use those implementations. I have no idea what version added
those calls, so I'm just guessing it's the same conditions as
__sincos_stret.
Most of this patch is working to preserve codegen when a vector
library is explicitly enabled. This only covers sleef and armpl,
as those are the only cases tested.
The multiple result libcalls have an aberrant process where the
legalizer looks for the scalar type's libcall in RuntimeLibcalls,
and then cross references TargetLibraryInfo to find a matching
vector call. This was unworkable in the sincospi case, since the
common case is there is no scalar call available. To preserve
codegen if the call is available, first try to match a libcall
with the vector type before falling back on the old scalar search.
Eventually all of this logic should be contained in RuntimeLibcalls,
without the link to TargetLibraryInfo. In principle we should perform
the same legalization logic as for an ordinary operation, trying
to find a matching subvector type with a libcall.