198 changes: 177 additions & 21 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
Expand Down Expand Up @@ -170,6 +171,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
case ISD::UADDO:
Expand Down Expand Up @@ -321,6 +323,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op = GetScalarizedVector(N->getOperand(1));
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
{ NewVT, MVT::Other },
{ N->getOperand(0), Op, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
Expand Down Expand Up @@ -604,6 +618,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
case ISD::STRICT_FP_ROUND:
Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
break;
case ISD::FP_ROUND:
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
break;
Expand Down Expand Up @@ -752,6 +769,20 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}

SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
unsigned OpNo) {
assert(OpNo == 1 && "Wrong operand for scalarization!");
SDValue Elt = GetScalarizedVector(N->getOperand(1));
SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
{ N->getValueType(0).getVectorElementType(),
MVT::Other },
{ N->getOperand(0), Elt, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}

SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
SDValue Res = GetScalarizedVector(N->getOperand(0));
// Result type may be wider than element type.
Expand Down Expand Up @@ -844,7 +875,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
Expand Down Expand Up @@ -1615,15 +1648,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,

// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
EVT InVT = N->getOperand(OpNo).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(OpNo), Lo, Hi);
else
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);

if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
} else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
{ N->getOperand(0), Lo, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
{ N->getOperand(0), Hi, N->getOperand(2) });
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
{ N->getOperand(0), Lo });
Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
{ N->getOperand(0), Hi });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
Expand Down Expand Up @@ -1824,6 +1876,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
Expand Down Expand Up @@ -1853,6 +1906,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
Expand Down Expand Up @@ -1894,7 +1948,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;

assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
if (N->isStrictFPOpcode())
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
"Invalid operand expansion");
else
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");

ReplaceValueWith(SDValue(N, 0), Res);
Expand Down Expand Up @@ -1982,14 +2040,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();

EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());

Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
{ N->getOperand(0), Lo });
Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
{ N->getOperand(0), Hi });

// Build a factor node to remember that this operation is independent
// of the other one.
SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));

// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
}

return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
Expand Down Expand Up @@ -2461,14 +2535,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo, Hi);
GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();

EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());

Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
{ N->getOperand(0), Lo, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
{ N->getOperand(0), Hi, N->getOperand(2) });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
}

return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
Expand Down Expand Up @@ -2632,6 +2718,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;

case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
Res = WidenVecRes_Convert_StrictFP(N);
break;

case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
Expand Down Expand Up @@ -3109,6 +3200,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}

SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());

EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };

EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();

unsigned Opcode = N->getOpcode();

// FIXME: Optimizations need to be implemented here.

// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
NewOps[1] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
ReplaceValueWith(SDValue(N, 1), NewChain);

return DAG.getBuildVector(WidenVT, DL, Ops);
}

SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue InOp = N->getOperand(0);
Expand Down Expand Up @@ -3895,6 +4023,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
break;

case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
Expand Down Expand Up @@ -3929,8 +4058,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return true;


assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
if (N->isStrictFPOpcode())
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
"Invalid operand expansion");
else
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");

ReplaceValueWith(SDValue(N, 0), Res);
return false;
Expand Down Expand Up @@ -4010,7 +4143,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
"Unexpected type action");
Expand All @@ -4019,10 +4152,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();

// See if a widened result type would be legal, if so widen the node.
// FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
InVT.getVectorNumElements());
if (TLI.isTypeLegal(WideVT)) {
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
{ N->getOperand(0), InOp });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
} else
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Expand All @@ -4032,12 +4174,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {

// Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
Opcode, dl, EltVT,
DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
if (N->isStrictFPOpcode()) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
SmallVector<SDValue, 32> OpChains;
for (unsigned i=0; i < NumElts; ++i) {
NewOps[1] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
for (unsigned i = 0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
Opcode, dl, EltVT,
DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
}

return DAG.getBuildVector(VT, dl, Ops);
}
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7611,15 +7611,30 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
// STRICT_FP_ROUND takes an extra argument describing whether or not
// the value will be changed by this node. See ISDOpcodes.h for details.
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break;
}

// We're taking this node out of the chain, so we need to re-link things.
SDValue InputChain = Node->getOperand(0);
SDValue OutputChain = SDValue(Node, 1);
ReplaceAllUsesOfValueWith(OutputChain, InputChain);

SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
SDVTList VTs;
SDNode *Res = nullptr;

switch (OrigOpc) {
default:
VTs = getVTList(Node->getOperand(1).getValueType());
break;
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
VTs = getVTList(Node->getValueType(0));
break;
}

if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
else if (IsTernary)
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6078,6 +6078,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
Expand Down Expand Up @@ -6834,6 +6836,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
case Intrinsic::experimental_constrained_fptrunc:
Opcode = ISD::STRICT_FP_ROUND;
break;
case Intrinsic::experimental_constrained_fpext:
Opcode = ISD::STRICT_FP_EXTEND;
break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
Expand Down Expand Up @@ -6897,7 +6905,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(

SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
if (Opcode == ISD::STRICT_FP_ROUND)
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
TLI.getPointerTy(DAG.getDataLayout())) });
else if (FPI.isUnaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
else if (FPI.isTernaryOp())
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";

case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/IR/IntrinsicInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4209,6 +4209,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
Expand Down Expand Up @@ -4687,6 +4689,47 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
HasRoundingMD = true;
break;

case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext: {
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
Assert((NumOperands == 3),
"invalid arguments for constrained FP intrinsic", &FPI);
HasRoundingMD = true;
} else {
Assert((NumOperands == 2),
"invalid arguments for constrained FP intrinsic", &FPI);
}
HasExceptionMD = true;

Value *Operand = FPI.getArgOperand(0);
Type *OperandTy = Operand->getType();
Value *Result = &FPI;
Type *ResultTy = Result->getType();
Assert(OperandTy->isFPOrFPVectorTy(),
"Intrinsic first argument must be FP or FP vector", &FPI);
Assert(ResultTy->isFPOrFPVectorTy(),
"Intrinsic result must be FP or FP vector", &FPI);
Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
if (OperandTy->isVectorTy()) {
auto *OperandVecTy = cast<VectorType>(OperandTy);
auto *ResultVecTy = cast<VectorType>(ResultTy);
Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
"Intrinsic first argument's type must be larger than result type",
&FPI);
} else {
Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
"Intrinsic first argument's type must be smaller than result type",
&FPI);
}
}
break;

default:
llvm_unreachable("Invalid constrained FP intrinsic!");
}
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/X86/fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,29 @@ entry:
ret double %rem
}

; Verify that round(42.1) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; CHECK-LABEL: @f21
; COMMON: cvtsd2ss
define float @f21() {
entry:
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret float %result
}

; CHECK-LABEL: @f22
; COMMON: cvtss2sd
define double @f22(float %x) {
entry:
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x,
metadata !"fpexcept.strict")
ret double %result
}

@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
Expand All @@ -306,3 +329,6 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadat
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)

219 changes: 219 additions & 0 deletions llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3831,6 +3831,217 @@ entry:
ret <4 x double> %min
}

define <1 x float> @constrained_vector_fptrunc_v1f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v1f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
<1 x double><double 42.1>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <1 x float> %result
}

define <2 x float> @constrained_vector_fptrunc_v2f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v2f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: retq
entry:
%result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
<2 x double><double 42.1, double 42.2>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <2 x float> %result
}

define <3 x float> @constrained_vector_fptrunc_v3f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v3f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v3f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-NEXT: retq
entry:
%result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
<3 x double><double 42.1, double 42.2,
double 42.3>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <3 x float> %result
}

define <4 x float> @constrained_vector_fptrunc_v4f64() {
; CHECK-LABEL: constrained_vector_fptrunc_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fptrunc_v4f64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0
; AVX-NEXT: retq
entry:
%result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
<4 x double><double 42.1, double 42.2,
double 42.3, double 42.4>,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret <4 x float> %result
}

define <1 x double> @constrained_vector_fpext_v1f32() {
; CHECK-LABEL: constrained_vector_fpext_v1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v1f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
<1 x float><float 42.0>,
metadata !"fpexcept.strict")
ret <1 x double> %result
}

define <2 x double> @constrained_vector_fpext_v2f32() {
; CHECK-LABEL: constrained_vector_fpext_v2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v2f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
entry:
%result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
<2 x float><float 42.0, float 43.0>,
metadata !"fpexcept.strict")
ret <2 x double> %result
}

define <3 x double> @constrained_vector_fpext_v3f32() {
; CHECK-LABEL: constrained_vector_fpext_v3f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm2, %xmm2
; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v3f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
entry:
%result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
<3 x float><float 42.0, float 43.0,
float 44.0>,
metadata !"fpexcept.strict")
ret <3 x double> %result
}

define <4 x double> @constrained_vector_fpext_v4f32() {
; CHECK-LABEL: constrained_vector_fpext_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm2
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: cvtss2sd %xmm1, %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX-LABEL: constrained_vector_fpext_v4f32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0
; AVX-NEXT: retq
entry:
%result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
<4 x float><float 42.0, float 43.0,
float 44.0, float 45.0>,
metadata !"fpexcept.strict")
ret <4 x double> %result
}

define <1 x float> @constrained_vector_ceil_v1f32() {
; CHECK-LABEL: constrained_vector_ceil_v1f32:
; CHECK: # %bb.0: # %entry
Expand Down Expand Up @@ -4413,6 +4624,8 @@ declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, met
declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
Expand All @@ -4438,6 +4651,8 @@ declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metad
declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
Expand Down Expand Up @@ -4482,6 +4697,8 @@ declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3
declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
Expand Down Expand Up @@ -4511,6 +4728,8 @@ declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, met
declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/Feature/fp-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,30 @@ entry:
ret double %result
}

; Verify that fptrunc(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f20
; CHECK: call float @llvm.experimental.constrained.fptrunc
define float @f20() {
entry:
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret float %result
}

; Verify that fpext(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f21
; CHECK: call double @llvm.experimental.constrained.fpext
define double @f21() {
entry:
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float 42.0,
metadata !"fpexcept.strict")
ret double %result
}

@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
Expand All @@ -260,3 +284,5 @@ declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadat
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)