From edc15e0e05efaeccafb197d4cc491db6a4763e55 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Sun, 28 Sep 2025 23:01:16 +0700 Subject: [PATCH] [FPEnv][SDAG] Implement FNEARBYINT with optional chain This change replaces the DAG node STRICT_FNEARBYINT with a modified FNEARBYINT, which uses optional chain property. The modified node can be used in both strictfp and default environments. This approach is based on the assumption that a floating-point operation is fundamentally the same in strictfp and default environments and is therefore lowered in almost identical manner. Indeed, all targets but one lower STRICT_FNEARBYINT using the same action as for FNEARBYINT. The only exception is PowerPC: it lowers STRICT_FNEARBYINT for vector types using `Expand`, even though FNEARBYINT for v4f32 and v2f64 is legal. This change implements the lowering uniformly, treating these vector types as legal for PowePC target. The change demonstrate the transition from using separate nodes for strictfp and default environments to using a single node with an optional chain. It also modifies some methods of DAG functions required to support such nodes. --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 1 - llvm/include/llvm/CodeGen/TargetLowering.h | 2 + llvm/include/llvm/IR/ConstrainedOps.def | 9 +- .../include/llvm/Target/TargetSelectionDAG.td | 7 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 24 ++- .../SelectionDAG/LegalizeFloatTypes.cpp | 114 +++++++++++- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 7 + .../SelectionDAG/LegalizeVectorOps.cpp | 23 ++- .../SelectionDAG/LegalizeVectorTypes.cpp | 103 ++++++++++- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 + .../SelectionDAG/SelectionDAGBuilder.cpp | 4 + .../SelectionDAG/SelectionDAGDumper.cpp | 1 - .../CodeGen/SelectionDAG/TargetLowering.cpp | 18 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 + .../Target/AArch64/AArch64ISelLowering.cpp | 7 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 - llvm/lib/Target/PowerPC/PPCInstrVSX.td | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 6 +- .../Target/SystemZ/SystemZISelLowering.cpp | 3 - llvm/lib/Target/SystemZ/SystemZInstrFP.td | 6 +- llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 +- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 - llvm/test/CodeGen/PowerPC/fp-strict-round.ll | 140 +-------------- .../vector-constrained-fp-intrinsics.ll | 162 ++---------------- .../vector-constrained-fp-intrinsics.ll | 8 +- 28 files changed, 335 insertions(+), 345 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index c76c83d84b3c7..e05a67ae0655f 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -448,7 +448,6 @@ enum NodeType { STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, - STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 73f2c55a71125..0d140ac745f67 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1341,6 +1341,8 @@ class LLVM_ABI TargetLoweringBase { unsigned EqOpc; switch (Op) { default: llvm_unreachable("Unexpected FP pseudo-opcode"); +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: EqOpc = ISD::DAGN; break; #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index 30a82bf633d57..76bed326a3e50 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -39,6 +39,12 @@ #define CMP_INSTRUCTION(N,A,R,I,D) DAG_INSTRUCTION(N,A,R,I,D) #endif +// FP_OPERATION is same as DAG_FUNCTION, but in DAG it is represented by the +// same node, as non-constrained function. +#ifndef FP_OPERATION +#define FP_OPERATION(N,A,R,I,D) DAG_FUNCTION(N,A,R,I,D) +#endif + // Arguments of the entries are: // - instruction or intrinsic function name. // - Number of original instruction/intrinsic arguments. @@ -91,7 +97,7 @@ DAG_FUNCTION(maxnum, 2, 0, experimental_constrained_maxnum, FMAXNUM DAG_FUNCTION(minnum, 2, 0, experimental_constrained_minnum, FMINNUM) DAG_FUNCTION(maximum, 2, 0, experimental_constrained_maximum, FMAXIMUM) DAG_FUNCTION(minimum, 2, 0, experimental_constrained_minimum, FMINIMUM) -DAG_FUNCTION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT) +FP_OPERATION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT) DAG_FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW) DAG_FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI) DAG_FUNCTION(ldexp, 2, 1, experimental_constrained_ldexp, FLDEXP) @@ -114,3 +120,4 @@ FUNCTION(fmuladd, 3, 1, experimental_constrained_fmuladd) #undef CMP_INSTRUCTION #undef DAG_INSTRUCTION #undef DAG_FUNCTION +#undef FP_OPERATION diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 632be7ad9e350..8edfb59455402 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -575,7 +575,7 @@ def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>; def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>; def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; -def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; +def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp, [SDNPMayHaveChain]>; def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; def froundeven : SDNode<"ISD::FROUNDEVEN" , SDTFPUnaryOp>; @@ -653,8 +653,6 @@ def strict_lrint : SDNode<"ISD::STRICT_LRINT", SDTFPToIntOp, [SDNPHasChain]>; def strict_llrint : SDNode<"ISD::STRICT_LLRINT", SDTFPToIntOp, [SDNPHasChain]>; -def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", - SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", @@ -1704,9 +1702,6 @@ def any_lrint : PatFrags<(ops node:$src), def any_llrint : PatFrags<(ops node:$src), [(strict_llrint node:$src), (llrint node:$src)]>; -def any_fnearbyint : PatFrags<(ops node:$src), - [(strict_fnearbyint node:$src), - (fnearbyint node:$src)]>; def any_fceil : PatFrags<(ops node:$src), [(strict_fceil node:$src), (fceil node:$src)]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5fb7e63cfb605..72fc5a7570ce7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2196,7 +2196,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, if (LC == RTLIB::UNKNOWN_LIBCALL) llvm_unreachable("Can't create an unknown libcall!"); - if (Node->isStrictFPOpcode()) { + if (Node->isStrictFPOpcode() || (Node->hasChain() && Node->isFPOperation())) { EVT RetVT = Node->getValueType(0); SmallVector Ops(drop_begin(Node->ops())); TargetLowering::MakeLibCallOptions CallOptions; @@ -4791,7 +4791,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::RINT_PPCF128, Results); break; case ISD::FNEARBYINT: - case ISD::STRICT_FNEARBYINT: ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -5760,7 +5759,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: - case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FTRUNC: @@ -5792,7 +5790,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: case ISD::STRICT_FROUNDEVEN: case ISD::STRICT_FTRUNC: @@ -5821,6 +5818,25 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; + case ISD::FNEARBYINT: + if (Node->hasChain()) { + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + } else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + } + break; case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 437d0f4654096..33d133ff9cba5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -115,7 +115,6 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::STRICT_FP_EXTEND: @@ -227,6 +226,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { return Tmp.first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOperation(SDNode *N, + RTLIB::Libcall LC) { + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + SmallVector Ops; + SmallVector OpsVT; + + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + OpsVT.push_back(Op.getValueType()); + Op = GetSoftenedFloat(Op); + Ops.push_back(Op); + } + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); + std::pair Tmp = + TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -582,7 +607,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + return SoftenFloatRes_FPOperation(N, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -1596,7 +1621,6 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::STRICT_FMUL: case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; case ISD::STRICT_FP_EXTEND: @@ -1688,6 +1712,21 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC, GetPairElements(Tmp.first, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC, + SDValue &Lo, SDValue &Hi) { + bool HasChain = N->hasChain(); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SmallVector Ops(HasChain ? llvm::drop_begin(N->ops()) : N->ops()); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall( + DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + GetPairElements(Tmp.first, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) { ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)), /*CallRetResNo=*/0); @@ -1951,7 +1990,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + ExpandFloatRes_FPOperation(N, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -2827,6 +2866,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + // Floating-point operations with optional chain. + case ISD::FNEARBYINT: + R = PromoteFloatRes_FPOperation(N); + break; + // Unary FP Operations case ISD::FABS: case ISD::FACOS: @@ -2843,7 +2887,6 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: - case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: @@ -3071,6 +3114,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOperation(SDNode *N) { + bool HasChain = N->hasChain(); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SmallVector Ops; + + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + // FIXME Use strict conversions for strict operations. + Op = GetPromotedFloat(Op); + Ops.push_back(Op); + } + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Ops); + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -3312,6 +3378,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; + // Floating-point operations with optional chain. + case ISD::FNEARBYINT: + R = SoftPromoteHalfRes_FPOperation(N); + break; + // Unary FP Operations case ISD::FACOS: case ISD::FASIN: @@ -3327,7 +3398,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: - case ISD::FNEARBYINT: case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: @@ -3714,6 +3784,38 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOperation(SDNode *N) { + SDLoc dl(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + + SmallVector Ops; + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(i)); + // FIXME Use strict conversions for strict operations. + Op = DAG.getNode(PromotionOpcode, dl, NVT, Op); + Ops.push_back(Op); + } + + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Ops); + if (HasChain) + Chain = Res.getValue(1); + + // Convert back to FP16 as an integer. + Res = DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); + + if (HasChain) + ReplaceValueWith(SDValue(N, 1), Chain); + return Res; +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { // Expand and soften recursively. ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 603dc34ce72a7..541977c7dad03 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -573,6 +573,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { bool SoftenFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N); SDValue SoftenFloatRes_BITCAST(SDNode *N); @@ -681,6 +682,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); void ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo = {}); + void ExpandFloatRes_FPOperation(SDNode *N, RTLIB::Libcall LC, SDValue &Lo, + SDValue &Hi); // clang-format off void ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -788,6 +791,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); SDValue PromoteFloatRes_VECREDUCE(SDNode *N); SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N); + SDValue PromoteFloatRes_FPOperation(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); @@ -839,6 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N); + SDValue SoftPromoteHalfRes_FPOperation(SDNode *N); bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); @@ -881,6 +886,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_VecInregOp(SDNode *N); + SDValue ScalarizeVecRes_FPOperation(SDNode *N); SDValue ScalarizeVecRes_ADDRSPACECAST(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); @@ -965,6 +971,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOperation(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 8e423c4f83b38..3abc8594f3d17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -319,9 +319,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" + if (!Node->hasChain()) { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + break; + } ValVT = Node->getValueType(0); if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || Op.getOpcode() == ISD::STRICT_UINT_TO_FP) @@ -435,7 +440,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: - case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FFLOOR: @@ -1218,11 +1222,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::SDIVFIXSAT: case ISD::UDIVFIXSAT: break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" - ExpandStrictFPOp(Node, Results); - return; + if (Node->hasChain()) { + ExpandStrictFPOp(Node, Results); + return; + } + if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: @@ -1305,7 +1318,6 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::FDIV: case ISD::FCEIL: case ISD::FFLOOR: - case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: @@ -1318,6 +1330,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { break; } + if (Node->hasChain()) + return UnrollStrictFPOp(Node, Results); + SDValue Unrolled = DAG.UnrollVectorOp(Node); if (Node->getNumValues() == 1) { Results.push_back(Unrolled); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f7c089a..b312396a83359 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -85,6 +85,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: R = ScalarizeVecRes_VecInregOp(N); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + R = ScalarizeVecRes_FPOperation(N); + break; case ISD::ABS: case ISD::ANY_EXTEND: case ISD::BITREVERSE: @@ -108,7 +112,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: @@ -211,6 +214,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_TernaryOp(N); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -513,6 +517,41 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOperation(SDNode *N) { + SDLoc DL(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + + SmallVector Ops; + if (HasChain) + Ops.push_back(Chain); + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); + } + Ops.push_back(Op); + } + + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Result; + if (HasChain) { + Result = DAG.getNode(N->getOpcode(), DL, {DestVT, MVT::Other}, Ops, + N->getFlags()); + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + } else { + Result = DAG.getNode(N->getOpcode(), DL, DestVT, Ops, N->getFlags()); + } + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); @@ -1262,6 +1301,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + SplitVecRes_FPOperation(N, Lo, Hi); + break; + case ISD::ABS: case ISD::VP_ABS: case ISD::BITREVERSE: @@ -1294,7 +1338,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::VP_FNEARBYINT: case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: @@ -1421,6 +1464,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_CMP(N, Lo, Hi); break; +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -2065,6 +2109,45 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, } } +void DAGTypeLegalizer::SplitVecRes_FPOperation(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + bool HasChain = N->hasChain(); + assert(N->getNumValues() == 1 + HasChain && + "multiple result is not supported yet"); + SDValue Chain = HasChain ? N->getOperand(0) : SDValue(); + + SmallVector OperandsLo; + SmallVector OperandsHi; + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + if (HasChain) { + OperandsLo.push_back(Chain); + OperandsHi.push_back(Chain); + } + for (unsigned i = HasChain, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + SDValue LHSLo, LHSHi; + GetSplitVector(Op, LHSLo, LHSHi); + OperandsLo.push_back(LHSLo); + OperandsHi.push_back(LHSHi); + } + SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (HasChain) { + Lo = DAG.getNode(Opcode, dl, {LoVT, MVT::Other}, OperandsLo, Flags); + Hi = DAG.getNode(Opcode, dl, {HiVT, MVT::Other}, OperandsHi, Flags); + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Chain); + } else { + Lo = DAG.getNode(Opcode, dl, LoVT, OperandsLo, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, OperandsHi, Flags); + } +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -4988,6 +5071,21 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; + +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) case ISD::DAGN: +#include "llvm/IR/ConstrainedOps.def" + if (N->hasChain()) + Res = WidenVecRes_StrictFP(N); + else if (N->getNumOperands() == 1) { + if (unrollExpandedOp()) + break; + Res = WidenVecRes_Unary(N); + } else { + llvm_unreachable("not supported yet"); + } + break; + +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" @@ -5073,7 +5171,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: - case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 08af74c258899..3b37ce1902596 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11663,6 +11663,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a0d856f..4dc4f3fd16863 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8362,6 +8362,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case Intrinsic::INTRINSIC: \ + Opcode = ISD::DAGN; \ + break; #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case Intrinsic::INTRINSIC: \ Opcode = ISD::STRICT_##DAGN; \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6c461d3..bdb84c4417949 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -251,7 +251,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FRINT: return "frint"; case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::STRICT_FROUND: return "strict_fround"; case ISD::FROUNDEVEN: return "froundeven"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d324e74b..e6f996f7b549e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12312,8 +12312,16 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node, if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT)) return SDValue(); + bool HasChain = Node->hasChain(); + SDValue Chain = HasChain ? Node->getOperand(0) : SDValue(); + SmallVector LoOps, HiOps; - for (const SDValue &V : Node->op_values()) { + if (HasChain) { + LoOps.push_back(Chain); + HiOps.push_back(Chain); + } + for (unsigned i = HasChain, e = Node->getNumOperands(); i != e; ++i) { + SDValue V = Node->getOperand(i); auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT); LoOps.push_back(Lo); HiOps.push_back(Hi); @@ -12321,7 +12329,13 @@ SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node, SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps); SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi); + SDValue R = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi); + if (HasChain) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SplitOpLo.getValue(1), SplitOpHi.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + } + return R; } SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT, diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index c23281a820b2b..b5d2153d1837e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -869,6 +869,7 @@ void TargetLoweringBase::initActions() { VT, Expand); // Constrained floating-point operations default to expand. +#define FP_OPERATION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ setOperationAction(ISD::STRICT_##DAGN, VT, Expand); #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 69651168f8539..a23c1d5d6bd37 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -821,7 +821,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, @@ -909,7 +908,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, - ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND, @@ -1253,7 +1252,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, + ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM}) setOperationAction(Op, MVT::v1f64, Expand); @@ -1408,7 +1407,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE, - ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, + ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) { for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f788c7510f80c..03bd5a9e9b615 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5465,7 +5465,7 @@ defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; @@ -5865,7 +5865,7 @@ def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; @@ -11056,7 +11056,7 @@ multiclass PromoteUnaryv8f16Tov4f32 } defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; -defm : PromoteUnaryv8f16Tov4f32; +defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; defm : PromoteUnaryv8f16Tov4f32; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2e6fa17..a41485ab1192e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1271,7 +1271,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 979ba31b0431b..1d166b487e0c6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3882,7 +3882,7 @@ def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; // Use current rounding mode -def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; +def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; // Round to nearest, ties away from zero def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; // Round towards Zero diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7123a2d706787..7057aa082fbfd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -539,7 +539,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have // complete support for all operations in LegalizeDAG. setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, - ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, + ISD::STRICT_FRINT, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP}, MVT::f16, Promote); @@ -1129,7 +1129,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, - ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, + ISD::STRICT_FROUNDEVEN}, VT, Custom); setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); @@ -1539,7 +1539,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, - ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, + ISD::STRICT_FROUNDEVEN}, VT, Custom); } @@ -3482,7 +3482,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); break; - case ISD::STRICT_FNEARBYINT: + case ISD::FNEARBYINT: Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, Mask, VL); @@ -3491,7 +3491,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, Chain = Truncated.getValue(1); // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. - if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { + if (Op.getOpcode() != ISD::FNEARBYINT) { Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), Chain, Truncated, Mask, VL); @@ -7902,6 +7902,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: + if (Op->hasChain()) + return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); @@ -8402,7 +8404,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STRICT_FRINT: case ISD::STRICT_FFLOOR: case ISD::STRICT_FTRUNC: - case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: case ISD::STRICT_FROUNDEVEN: return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index a2737d247fe31..fc96c5324aaa9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -251,7 +251,7 @@ def: PatFprFpr; def: Pat<(any_frint FPR32:$rs1), (FROUNDNX_S FPR32:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(any_fnearbyint FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_DYN)>; +def: Pat<(fnearbyint FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_DYN)>; def: Pat<(any_fround FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RMM)>; def: Pat<(any_froundeven FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RNE)>; @@ -276,7 +276,7 @@ def: PatFprFpr; def: Pat<(any_frint FPR64:$rs1), (FROUNDNX_D FPR64:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(any_fnearbyint FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_DYN)>; +def: Pat<(fnearbyint FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_DYN)>; def: Pat<(any_fround FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RMM)>; def: Pat<(any_froundeven FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RNE)>; @@ -306,7 +306,7 @@ def: PatFprFpr; def: Pat<(f16 (any_frint FPR16:$rs1)), (FROUNDNX_H FPR16:$rs1, FRM_DYN)>; // fnearbyint is like frint but does not detect inexact conditions. -def: Pat<(f16 (any_fnearbyint FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_DYN)>; +def: Pat<(f16 (fnearbyint FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_DYN)>; def: Pat<(f16 (any_fround FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RMM)>; def: Pat<(f16 (any_froundeven FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RNE)>; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b7d11a318dc4..c0e9b419866a7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -601,7 +601,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); if (Subtarget.hasFPExtension()) { - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); setOperationAction(ISD::STRICT_FCEIL, VT, Legal); setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); @@ -653,7 +652,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); @@ -721,7 +719,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 33f73bc658b25..3d8a2a424e7a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -430,9 +430,9 @@ let Predicates = [FeatureFPExtension] in { } // fnearbyint is like frint but does not detect inexact conditions. - def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; - def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; - def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; + def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>; + def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>; + def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>; // floor is no longer allowed to raise an inexact condition, // so restrict it to the cases where the condition can be suppressed. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 479bab5ce62b8..9a4930d0f77a4 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1409,7 +1409,7 @@ let Predicates = [FeatureVector] in { // rounding modes. multiclass VectorRounding { def : FPConversion; - def : FPConversion; + def : FPConversion; def : FPConversion; def : FPConversion; def : FPConversion; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec125e8f..85b12e649624f 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1258,7 +1258,6 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FROUNDEVEN: case ISD::STRICT_FROUNDEVEN: case ISD::FNEARBYINT: - case ISD::STRICT_FNEARBYINT: case ISD::FRINT: case ISD::STRICT_FRINT: { // Replace fp rounding with their X86 specific equivalent so we don't @@ -1274,7 +1273,6 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FTRUNC: Imm = 0xB; break; case ISD::STRICT_FROUNDEVEN: case ISD::FROUNDEVEN: Imm = 0x8; break; - case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: Imm = 0xC; break; case ISD::STRICT_FRINT: case ISD::FRINT: Imm = 0x4; break; @@ -1282,7 +1280,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDLoc dl(N); bool IsStrict = N->isStrictFPOpcode(); SDValue Res; - if (IsStrict) + if (IsStrict || (N->hasChain() && N->isFPOperation())) Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, {N->getValueType(0), MVT::Other}, {N->getOperand(0), N->getOperand(1), diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eea84a2841764..4c78891f78f3f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -705,7 +705,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote); setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote); setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote); setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote); setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote); @@ -1359,7 +1358,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, RoundedTy, Legal); setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal); @@ -1460,7 +1458,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); @@ -1917,7 +1914,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); @@ -2255,7 +2251,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FRINT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::FROUNDEVEN, VT, Legal); setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll index eac4fb6f98bf7..56e2379c7e621 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -205,96 +205,12 @@ define double @nearbyint_f64(double %f1, double %f2) strictfp { define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp { ; P8-LABEL: nearbyint_v4f32: ; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -176(r1) -; P8-NEXT: std r0, 192(r1) -; P8-NEXT: .cfi_def_cfa_offset 176 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: .cfi_offset v29, -48 -; P8-NEXT: .cfi_offset v30, -32 -; P8-NEXT: .cfi_offset v31, -16 -; P8-NEXT: xxsldwi vs0, v2, v2, 3 -; P8-NEXT: li r3, 128 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 144 -; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 160 -; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v2 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxsldwi vs0, v31, v31, 1 -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxmrghd vs0, vs1, v30 -; P8-NEXT: xscvspdpn f1, v31 -; P8-NEXT: xvcvdpsp v29, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxswapd vs0, v31 -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xscvspdpn f1, vs0 -; P8-NEXT: bl nearbyintf -; P8-NEXT: nop -; P8-NEXT: xxmrghd vs0, v30, vs1 -; P8-NEXT: li r3, 160 -; P8-NEXT: xvcvdpsp v2, vs0 -; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 144 -; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 128 -; P8-NEXT: vmrgew v2, v2, v29 -; P8-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 176 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 +; P8-NEXT: xvrspic v2, v2 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v4f32: ; P9: # %bb.0: -; P9-NEXT: mflr r0 -; P9-NEXT: stdu r1, -80(r1) -; P9-NEXT: std r0, 96(r1) -; P9-NEXT: .cfi_def_cfa_offset 80 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: .cfi_offset v29, -48 -; P9-NEXT: .cfi_offset v30, -32 -; P9-NEXT: .cfi_offset v31, -16 -; P9-NEXT: xxsldwi vs0, v2, v2, 3 -; P9-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill -; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill -; P9-NEXT: vmr v31, v2 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxsldwi vs0, v31, v31, 1 -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxmrghd vs0, vs1, v30 -; P9-NEXT: xscvspdpn f1, v31 -; P9-NEXT: xvcvdpsp v29, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxswapd vs0, v31 -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xscvspdpn f1, vs0 -; P9-NEXT: bl nearbyintf -; P9-NEXT: nop -; P9-NEXT: xxmrghd vs0, v30, vs1 -; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload -; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload -; P9-NEXT: xvcvdpsp v2, vs0 -; P9-NEXT: vmrgew v2, v2, v29 -; P9-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload -; P9-NEXT: addi r1, r1, 80 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 +; P9-NEXT: xvrspic v2, v2 ; P9-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( <4 x float> %vf1, @@ -306,60 +222,12 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) strictfp { ; P8-LABEL: nearbyint_v2f64: ; P8: # %bb.0: -; P8-NEXT: mflr r0 -; P8-NEXT: stdu r1, -160(r1) -; P8-NEXT: std r0, 176(r1) -; P8-NEXT: .cfi_def_cfa_offset 160 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: .cfi_offset v30, -32 -; P8-NEXT: .cfi_offset v31, -16 -; P8-NEXT: li r3, 128 -; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; P8-NEXT: li r3, 144 -; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; P8-NEXT: vmr v31, v2 -; P8-NEXT: xxlor f1, v31, v31 -; P8-NEXT: bl nearbyint -; P8-NEXT: nop -; P8-NEXT: xxlor v30, f1, f1 -; P8-NEXT: xxswapd vs1, v31 -; P8-NEXT: bl nearbyint -; P8-NEXT: nop -; P8-NEXT: li r3, 144 -; P8-NEXT: xxmrghd v2, v30, vs1 -; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; P8-NEXT: li r3, 128 -; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; P8-NEXT: addi r1, r1, 160 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 +; P8-NEXT: xvrdpic v2, v2 ; P8-NEXT: blr ; ; P9-LABEL: nearbyint_v2f64: ; P9: # %bb.0: -; P9-NEXT: mflr r0 -; P9-NEXT: stdu r1, -64(r1) -; P9-NEXT: std r0, 80(r1) -; P9-NEXT: .cfi_def_cfa_offset 64 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: .cfi_offset v30, -32 -; P9-NEXT: .cfi_offset v31, -16 -; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill -; P9-NEXT: vmr v31, v2 -; P9-NEXT: xscpsgndp f1, v31, v31 -; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill -; P9-NEXT: bl nearbyint -; P9-NEXT: nop -; P9-NEXT: xscpsgndp v30, f1, f1 -; P9-NEXT: xxswapd vs1, v31 -; P9-NEXT: bl nearbyint -; P9-NEXT: nop -; P9-NEXT: xxmrghd v2, v30, vs1 -; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload -; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload -; P9-NEXT: addi r1, r1, 64 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 +; P9-NEXT: xvrdpic v2, v2 ; P9-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> %vf1, diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 71c3069a406fe..cf6342726e569 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -4463,52 +4463,12 @@ entry: define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 96(1) -; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 62, 1, 1 -; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: xxmrghd 34, 62, 1 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xvrdpic 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: std 0, 80(1) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 62, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 34, 62, 1 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: xvrdpic 34, 34 ; PC64LE9-NEXT: blr entry: %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( @@ -4611,32 +4571,21 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearby_v3f64: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -80(1) +; PC64LE-NEXT: stdu 1, -64(1) +; PC64LE-NEXT: xxmrghd 0, 2, 1 +; PC64LE-NEXT: fmr 1, 3 ; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 96(1) -; PC64LE-NEXT: stfd 30, 64(1) # 8-byte Folded Spill -; PC64LE-NEXT: fmr 30, 2 -; PC64LE-NEXT: stfd 31, 72(1) # 8-byte Folded Spill -; PC64LE-NEXT: fmr 31, 3 +; PC64LE-NEXT: std 0, 80(1) ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 63, 1, 1 -; PC64LE-NEXT: fmr 1, 30 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxmrghd 63, 1, 63 -; PC64LE-NEXT: fmr 1, 31 +; PC64LE-NEXT: xvrdpic 63, 0 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: fmr 3, 1 ; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: lfd 31, 72(1) # 8-byte Folded Reload ; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 1, 1, 64 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -4644,30 +4593,19 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 { ; PC64LE9-LABEL: constrained_vector_nearby_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: std 0, 80(1) -; PC64LE9-NEXT: stfd 30, 48(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: xxmrghd 0, 2, 1 +; PC64LE9-NEXT: fmr 1, 3 +; PC64LE9-NEXT: std 0, 64(1) ; PC64LE9-NEXT: stxv 63, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stfd 31, 56(1) # 8-byte Folded Spill -; PC64LE9-NEXT: fmr 31, 3 -; PC64LE9-NEXT: fmr 30, 2 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 63, 1, 1 -; PC64LE9-NEXT: fmr 1, 30 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 63, 1, 63 -; PC64LE9-NEXT: fmr 1, 31 +; PC64LE9-NEXT: xvrdpic 63, 0 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: fmr 3, 1 ; PC64LE9-NEXT: xxswapd 1, 63 ; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -4682,78 +4620,14 @@ entry: define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 { ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: stdu 1, -96(1) -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: std 0, 112(1) -; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 -; PC64LE-NEXT: li 3, 80 -; PC64LE-NEXT: xxlor 1, 62, 62 -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 61, 1, 1 -; PC64LE-NEXT: xxswapd 1, 62 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxmrghd 62, 61, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: xxlor 61, 1, 1 -; PC64LE-NEXT: xxswapd 1, 63 -; PC64LE-NEXT: bl nearbyint -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 80 -; PC64LE-NEXT: vmr 2, 30 -; PC64LE-NEXT: xxmrghd 35, 61, 1 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 96 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: xvrdpic 35, 35 +; PC64LE-NEXT: xvrdpic 34, 34 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: stdu 1, -80(1) -; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 -; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 31, 3 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 61, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 62 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 62, 61, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscpsgndp 61, 1, 1 -; PC64LE9-NEXT: xxswapd 1, 63 -; PC64LE9-NEXT: bl nearbyint -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xxmrghd 35, 61, 1 -; PC64LE9-NEXT: vmr 2, 30 -; PC64LE9-NEXT: lxv 63, 64(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 61, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 80 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: xvrdpic 35, 35 +; PC64LE9-NEXT: xvrdpic 34, 34 ; PC64LE9-NEXT: blr entry: %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 614f7b243c7e2..b8a21d741e81d 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -4506,10 +4506,10 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(ptr %a) #0 { ; ; SZ13-LABEL: constrained_vector_nearbyint_v4f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vl %v0, 16(%r2), 4 -; SZ13-NEXT: vl %v1, 0(%r2), 4 -; SZ13-NEXT: vfidb %v24, %v1, 4, 0 -; SZ13-NEXT: vfidb %v26, %v0, 4, 0 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: vl %v1, 16(%r2), 4 +; SZ13-NEXT: vfidb %v26, %v1, 4, 0 +; SZ13-NEXT: vfidb %v24, %v0, 4, 0 ; SZ13-NEXT: br %r14 entry: %b = load <4 x double>, ptr %a