Skip to content

Commit

Permalink
[FPEnv] Fix chain handling regression after 04a8696
Browse files Browse the repository at this point in the history
Code in getRoot made the assumption that every node in PendingLoads
must always itself have a dependency on the current DAG root node.

After the changes in 04a8696, it turns out that this assumption no
longer holds true, causing wrong codegen in some cases (e.g. stores
after constrained FP intrinsics might get deleted).

To fix this, we now need to make sure that the TokenFactor created
by getRoot always includes the previous root, if there is no implicit
dependency already present.

The original getControlRoot code already has exactly this check,
so this patch simply reuses that code now for getRoot as well.
This fixes the regression.

NFC if no constrained FP intrinsic is present.
  • Loading branch information
uweigand committed Jan 14, 2020
1 parent df18650 commit 81ee484
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 37 deletions.
62 changes: 28 additions & 34 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -1037,24 +1037,41 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}

SDValue SelectionDAGBuilder::getMemoryRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
// Update DAG root to include dependencies on Pending chains.
SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
SDValue Root = DAG.getRoot();

if (PendingLoads.size() == 1) {
SDValue Root = PendingLoads[0];
DAG.setRoot(Root);
PendingLoads.clear();
if (Pending.empty())
return Root;

// Add current root to PendingChains, unless we already indirectly
// depend on it.
if (Root.getOpcode() != ISD::EntryToken) {
unsigned i = 0, e = Pending.size();
for (; i != e; ++i) {
assert(Pending[i].getNode()->getNumOperands() > 1);
if (Pending[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}

if (i == e)
Pending.push_back(Root);
}

// Otherwise, we have to make a token factor node.
SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
PendingLoads.clear();
if (Pending.size() == 1)
Root = Pending[0];
else
Root = DAG.getTokenFactor(getCurSDLoc(), Pending);

DAG.setRoot(Root);
Pending.clear();
return Root;
}

SDValue SelectionDAGBuilder::getMemoryRoot() {
return updateRoot(PendingLoads);
}

SDValue SelectionDAGBuilder::getRoot() {
// Chain up all pending constrained intrinsics together with all
// pending loads, by simply appending them to PendingLoads and
Expand All @@ -1072,35 +1089,12 @@ SDValue SelectionDAGBuilder::getRoot() {
}

SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();

// We need to emit pending fpexcept.strict constrained intrinsics,
// so append them to the PendingExports list.
PendingExports.append(PendingConstrainedFPStrict.begin(),
PendingConstrainedFPStrict.end());
PendingConstrainedFPStrict.clear();

if (PendingExports.empty())
return Root;

// Turn all of the CopyToReg chains into one factored node.
if (Root.getOpcode() != ISD::EntryToken) {
unsigned i = 0, e = PendingExports.size();
for (; i != e; ++i) {
assert(PendingExports[i].getNode()->getNumOperands() > 1);
if (PendingExports[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}

if (i == e)
PendingExports.push_back(Root);
}

Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
PendingExports);
PendingExports.clear();
DAG.setRoot(Root);
return Root;
return updateRoot(PendingExports);
}

void SelectionDAGBuilder::visit(const Instruction &I) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Expand Up @@ -154,6 +154,9 @@ class SelectionDAGBuilder {
SmallVector<SDValue, 8> PendingConstrainedFP;
SmallVector<SDValue, 8> PendingConstrainedFPStrict;

/// Update root to include all chains from the Pending list.
SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);

/// A unique monotonically increasing number used to order the SDNodes we
/// create.
unsigned SDNodeOrder;
Expand Down
64 changes: 61 additions & 3 deletions llvm/test/CodeGen/SystemZ/fp-strict-alias.ll
Expand Up @@ -5,6 +5,7 @@
declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
declare float @llvm.sqrt.f32(float)
declare void @llvm.s390.sfpc(i32)
declare void @bar()

; The basic assumption of all following tests is that on z13, we never
; want to see two square root instructions directly in a row, so the
Expand Down Expand Up @@ -300,7 +301,7 @@ define void @f13(float %f1) {
ret void
}

define void @f14(float %f1) {
define void @f14(float %f1) #0 {
; CHECK-LABEL: f14:
; CHECK-NOT: sqeb
; CHECK: br %r14
Expand All @@ -313,7 +314,7 @@ define void @f14(float %f1) {
ret void
}

define void @f15(float %f1) {
define void @f15(float %f1) #0 {
; CHECK-LABEL: f15:
; CHECK-NOT: sqeb
; CHECK: br %r14
Expand All @@ -326,7 +327,7 @@ define void @f15(float %f1) {
ret void
}

define void @f16(float %f1) {
define void @f16(float %f1) #0 {
; CHECK-LABEL: f16:
; CHECK: sqebr
; CHECK: br %r14
Expand All @@ -339,4 +340,61 @@ define void @f16(float %f1) {
ret void
}


; Verify that constrained intrinsics and memory operations get their
; chains linked up properly.

define void @f17(float %in, float* %out) #0 {
; CHECK-LABEL: f17:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
%sqrt = call float @llvm.sqrt.f32(float %in)
store float %sqrt, float* %out, align 4
tail call void @bar() #0
ret void
}

define void @f18(float %in, float* %out) #0 {
; CHECK-LABEL: f18:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
%sqrt = call float @llvm.experimental.constrained.sqrt.f32(
float %in,
metadata !"round.dynamic",
metadata !"fpexcept.ignore") #0
store float %sqrt, float* %out, align 4
tail call void @bar() #0
ret void
}

define void @f19(float %in, float* %out) #0 {
; CHECK-LABEL: f19:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
%sqrt = call float @llvm.experimental.constrained.sqrt.f32(
float %in,
metadata !"round.dynamic",
metadata !"fpexcept.maytrap") #0
store float %sqrt, float* %out, align 4
tail call void @bar() #0
ret void
}

define void @f20(float %in, float* %out) #0 {
; CHECK-LABEL: f20:
; CHECK: sqebr
; CHECK: ste
; CHECK: jg bar
%sqrt = call float @llvm.experimental.constrained.sqrt.f32(
float %in,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
store float %sqrt, float* %out, align 4
tail call void @bar() #0
ret void
}

attributes #0 = { strictfp }

0 comments on commit 81ee484

Please sign in to comment.