47 changes: 47 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5336,6 +5336,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return nullptr;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
visitConstrainedFPIntrinsic(I, Intrinsic);
return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
Expand Down Expand Up @@ -5784,6 +5791,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}

void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
unsigned Intrinsic) {
SDLoc sdl = getCurSDLoc();
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::experimental_constrained_fadd:
Opcode = ISD::STRICT_FADD;
break;
case Intrinsic::experimental_constrained_fsub:
Opcode = ISD::STRICT_FSUB;
break;
case Intrinsic::experimental_constrained_fmul:
Opcode = ISD::STRICT_FMUL;
break;
case Intrinsic::experimental_constrained_fdiv:
Opcode = ISD::STRICT_FDIV;
break;
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getRoot();
SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)) };
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
ValueVTs.push_back(MVT::Other); // Out chain

SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops);

assert(Result.getNode()->getNumValues() == 2);
SDValue OutChain = Result.getValue(1);
DAG.setRoot(OutChain);
SDValue FPResult = Result.getValue(0);
setValue(&I, FPResult);
}

std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,7 @@ class SelectionDAGBuilder {
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic);

void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
Expand Down
60 changes: 60 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,50 @@ class ISelUpdater : public SelectionDAG::DAGUpdateListener {
};
} // end anonymous namespace

static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) {
unsigned OrigOpc = Node->getOpcode();
switch (OrigOpc) {
case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true;
case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true;
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true;
default: return false;
}
}

SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) {
assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) ||
(Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) ||
(Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) ||
(Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) ||
(Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) &&
"Unexpected StrictFP opcode!");

// We're taking this node out of the chain, so we need to re-link things.
SDValue InputChain = Node->getOperand(0);
SDValue OutputChain = SDValue(Node, 1);
CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain);

SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType());
SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) };
SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops);

// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
// updates the node in place to have the requested operands.
if (Res == Node) {
// If we updated the node in place, reset the node ID. To the isel,
// this should be just like a newly allocated machine node.
Res->setNodeId(-1);
} else {
CurDAG->ReplaceAllUsesWith(Node, Res);
CurDAG->RemoveDeadNode(Node);
}

return Res;
}

void SelectionDAGISel::DoInstructionSelection() {
DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
Expand Down Expand Up @@ -960,7 +1004,23 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;

// When we are using non-default rounding modes or FP exception behavior
// FP operations are represented by StrictFP pseudo-operations. They
// need to be simplified here so that the target-specific instruction
// selectors know how to handle them.
//
// If the current node is a strict FP pseudo-op, the isStrictFPOp()
// function will provide the corresponding normal FP opcode to which the
// node should be mutated.
unsigned NormalFPOpc = ISD::UNDEF;
bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc);
if (IsStrictFPOp)
Node = MutateStrictFPToFP(Node, NormalFPOpc);

Select(Node);

// FIXME: Add code here to attach an implicit def and use of
// target-specific FP environment registers.
}

CurDAG->setRoot(Dummy.getValue());
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/IR/IntrinsicInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
Expand Down Expand Up @@ -93,3 +94,34 @@ Value *InstrProfIncrementInst::getStep() const {
LLVMContext &Context = M->getContext();
return ConstantInt::get(Type::getInt64Ty(Context), 1);
}

ConstrainedFPIntrinsic::RoundingMode
ConstrainedFPIntrinsic::getRoundingMode() const {
Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return rmInvalid;
StringRef RoundingArg = cast<MDString>(MD)->getString();

// For dynamic rounding mode, we use round to nearest but we will set the
// 'exact' SDNodeFlag so that the value will not be rounded.
return StringSwitch<RoundingMode>(RoundingArg)
.Case("round.dynamic", rmDynamic)
.Case("round.tonearest", rmToNearest)
.Case("round.downward", rmDownward)
.Case("round.upward", rmUpward)
.Case("round.towardzero", rmTowardZero)
.Default(rmInvalid);
}

ConstrainedFPIntrinsic::ExceptionBehavior
ConstrainedFPIntrinsic::getExceptionBehavior() const {
Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(3))->getMetadata();
if (!MD || !isa<MDString>(MD))
return ebInvalid;
StringRef ExceptionArg = cast<MDString>(MD)->getString();
return StringSwitch<ExceptionBehavior>(ExceptionArg)
.Case("fpexcept.ignore", ebIgnore)
.Case("fpexcept.maytrap", ebMayTrap)
.Case("fpexcept.strict", ebStrict)
.Default(ebInvalid);
}
18 changes: 18 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
void visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS);
void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI);
template <class DbgIntrinsicTy>
void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII);
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
Expand Down Expand Up @@ -3929,6 +3930,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"constant int",
CS);
break;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
visitConstrainedFPIntrinsic(
cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
break;
case Intrinsic::dbg_declare: // llvm.dbg.declare
Assert(isa<MetadataAsValue>(CS.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", CS);
Expand Down Expand Up @@ -4294,6 +4303,15 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
return nullptr;
}

void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Assert(isa<MetadataAsValue>(FPI.getOperand(2)),
"invalid rounding mode argument", &FPI);
Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid,
"invalid rounding mode argument", &FPI);
Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid,
"invalid exception behavior argument", &FPI);
}

template <class DbgIntrinsicTy>
void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
Expand Down
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/X86/fp-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s

; Verify that constants aren't folded to inexact results when the rounding mode
; is unknown.
;
; double f1() {
; // Because 0.1 cannot be represented exactly, this shouldn't be folded.
; return 1.0/10.0;
; }
;
; CHECK-LABEL: f1
; CHECK: divsd
define double @f1() {
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
double 1.000000e+00,
double 1.000000e+01,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %div
}

; Verify that 'a - 0' isn't simplified to 'a' when the rounding mode is unknown.
;
; double f2(double a) {
; // Because the result of '0 - 0' is negative zero if rounding mode is
; // downward, this shouldn't be simplified.
; return a - 0;
; }
;
; CHECK-LABEL: f2
; CHECK: subsd
define double @f2(double %a) {
entry:
%div = call double @llvm.experimental.constrained.fsub.f64(
double %a,
double 0.000000e+00,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %div
}

; Verify that '-((-a)*b)' isn't simplified to 'a*b' when the rounding mode is
; unknown.
;
; double f3(double a, double b) {
; // Because the intermediate value involved in this calculation may require
; // rounding, this shouldn't be simplified.
; return -((-a)*b);
; }
;
; CHECK-LABEL: f3:
; CHECK: subsd
; CHECK: mulsd
; CHECK: subsd
define double @f3(double %a, double %b) {
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
%mul = call double @llvm.experimental.constrained.fmul.f64(
double %sub, double %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
%ret = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00,
double %mul,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %ret
}

; Verify that FP operations are not performed speculatively when FP exceptions
; are not being ignored.
;
; double f4(int n, double a) {
; // Because a + 1 may overflow, this should not be simplified.
; if (n > 0)
; return a + 1.0;
; return a;
; }
;
;
; CHECK-LABEL: f4:
; CHECK: testl
; CHECK: jle
; CHECK: addsd
define double @f4(i32 %n, double %a) {
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %if.then, label %if.end

if.then:
%add = call double @llvm.experimental.constrained.fadd.f64(
double 1.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
br label %if.end

if.end:
%a.0 = phi double [%add, %if.then], [ %a, %entry ]
ret double %a.0
}


@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
102 changes: 102 additions & 0 deletions llvm/test/Feature/fp-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
; RUN: opt -O3 -S < %s | FileCheck %s

; Test to verify that constants aren't folded when the rounding mode is unknown.
; CHECK-LABEL: @f1
; CHECK: call double @llvm.experimental.constrained.fdiv.f64
define double @f1() {
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
double 1.000000e+00,
double 1.000000e+01,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %div
}

; Verify that 'a - 0' isn't simplified to 'a' when the rounding mode is unknown.
;
; double f2(double a) {
; // Because the result of '0 - 0' is negative zero if rounding mode is
; // downward, this shouldn't be simplified.
; return a - 0.0;
; }
;
; CHECK-LABEL: @f2
; CHECK: call double @llvm.experimental.constrained.fsub.f64
define double @f2(double %a) {
entry:
%div = call double @llvm.experimental.constrained.fsub.f64(
double %a, double 0.000000e+00,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %div
}

; Verify that '-((-a)*b)' isn't simplified to 'a*b' when the rounding mode is
; unknown.
;
; double f3(double a, double b) {
; // Because the intermediate value involved in this calculation may require
; // rounding, this shouldn't be simplified.
; return -((-a)*b);
; }
;
; CHECK-LABEL: @f3
; CHECK: call double @llvm.experimental.constrained.fsub.f64
; CHECK: call double @llvm.experimental.constrained.fmul.f64
; CHECK: call double @llvm.experimental.constrained.fsub.f64
define double @f3(double %a, double %b) {
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
%mul = call double @llvm.experimental.constrained.fmul.f64(
double %sub, double %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
%ret = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00,
double %mul,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %ret
}

; Verify that FP operations are not performed speculatively when FP exceptions
; are not being ignored.
;
; double f4(int n, double a) {
; // Because a + 1 may overflow, this should not be simplified.
; if (n > 0)
; return a + 1.0;
; return a;
; }
;
;
; CHECK-LABEL: @f4
; CHECK-NOT: select
; CHECK: br i1 %cmp
define double @f4(i32 %n, double %a) {
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %if.then, label %if.end

if.then:
%add = call double @llvm.experimental.constrained.fadd.f64(
double 1.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
br label %if.end

if.end:
%a.0 = phi double [%add, %if.then], [ %a, %entry ]
ret double %a.0
}


@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
43 changes: 43 additions & 0 deletions llvm/test/Verifier/fp-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
; RUN: opt -verify -S < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s
; RUN: sed -e s/.T2:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s
; RUN: sed -e s/.T3:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK3 %s

; Common declaration used for all runs.
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)

; Test that the verifier accepts legal code, and that the correct attributes are
; attached to the FP intrinsic.
; CHECK1: declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #[[ATTR:[0-9]+]]
; CHECK1: attributes #[[ATTR]] = { inaccessiblememonly nounwind }
; Note: FP exceptions aren't usually caught through normal unwind mechanisms,
; but we may want to revisit this for asynchronous exception handling.
define double @f1(double %a, double %b) {
entry:
%fadd = call double @llvm.experimental.constrained.fadd.f64(
double %a, double %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict")
ret double %fadd
}

; Test an illegal value for the rounding mode argument.
; CHECK2: invalid rounding mode argument
;T2: define double @f2(double %a, double %b) {
;T2: entry:
;T2: %fadd = call double @llvm.experimental.constrained.fadd.f64(
;T2: double %a, double %b,
;T2: metadata !"round.dynomite",
;T2: metadata !"fpexcept.strict")
;T2: ret double %fadd
;T2: }

; Test an illegal value for the exception behavior argument.
; CHECK3: invalid exception behavior argument
;T3: define double @f2(double %a, double %b) {
;T3: entry:
;T3: %fadd = call double @llvm.experimental.constrained.fadd.f64(
;T3: double %a, double %b,
;T3: metadata !"round.dynamic",
;T3: metadata !"fpexcept.restrict")
;T3: ret double %fadd
;T3: }