Skip to content

Commit

Permalink
[LoopFusion] Move instructions from FC0.Latch to FC1.Latch.
Browse files Browse the repository at this point in the history
Summary:This PR move instructions from FC0.Latch bottom up to the
beginning of FC1.Latch as long as they are proven safe.

To illustrate why this is beneficial, let's consider the following
example:
Before Fusion:
header1:
  br header2
header2:
  br header2, latch1
latch1:
  br header1, preheader3
preheader3:
  br header3
header3:
  br header4
header4:
  br header4, latch3
latch3:
  br header3, exit3

After Fusion (before this PR):
header1:
  br header2
header2:
  br header2, latch1
latch1:
  br header3
header3:
  br header4
header4:
  br header4, latch3
latch3:
  br header1, exit3

Note that preheader3 is removed during fusion before this PR.
Notice that we cannot fuse loop2 with loop4 as there exists block latch1
in between.
This PR move instructions from latch1 to beginning of latch3, and remove
block latch1. LoopFusion is now able to fuse loop nest recursively.

After Fusion (after this PR):
header1:
  br header2
header2:
  br header3
header3:
  br header4
header4:
  br header2, latch3
latch3:
  br header1, exit3

Reviewer: kbarton, jdoerfert, Meinersbur, dmgreen, fhahn, hfinkel,
bmahjour, etiotto
Reviewed By: kbarton, Meinersbur
Subscribers: hiraditya, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D71165
  • Loading branch information
Whitney Tsang committed Dec 17, 2019
1 parent 84161f1 commit 36bdc3d
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 47 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
Expand Up @@ -45,6 +45,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
const DominatorTree &DT, const PostDominatorTree &PDT,
DependenceInfo &DI);

/// Move instructions from \p FromBB bottom up to the beginning of \p ToBB
/// when proven safe.
void moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
const DominatorTree &DT, const PostDominatorTree &PDT,
DependenceInfo &DI);

} // end namespace llvm

#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
43 changes: 43 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopFuse.cpp
Expand Up @@ -1114,6 +1114,29 @@ struct LoopFuser {
return FC.ExitBlock->size() == 1;
}

/// Simplify the condition of the latch branch of \p FC to true, when both of
/// its successors are the same.
void simplifyLatchBranch(const FusionCandidate &FC) const {
BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
if (FCLatchBranch) {
assert(FCLatchBranch->isConditional() &&
FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
"Expecting the two successors of FCLatchBranch to be the same");
FCLatchBranch->setCondition(
llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
}
}

/// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
/// successor, then merge FC0.Latch with its unique successor.
void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
MergeBlockIntoPredecessor(Succ, &DTU, &LI);
DTU.flush();
}
}

/// Fuse two fusion candidates, creating a new fused loop.
///
/// This method contains the mechanics of fusing two loops, represented by \p
Expand Down Expand Up @@ -1247,6 +1270,10 @@ struct LoopFuser {
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);

// Change the condition of FC0 latch branch to true, as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);

// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
Expand All @@ -1269,9 +1296,15 @@ struct LoopFuser {

// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
// Note: Need to forget the loops before merging the loop latches, as
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);

// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);

// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
Expand Down Expand Up @@ -1491,6 +1524,10 @@ struct LoopFuser {
FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);

// Change the condition of FC0 latch branch to true, as both successors of
// the branch are the same.
simplifyLatchBranch(FC0);

// If FC0.Latch and FC0.ExitingBlock are the same then we have already
// performed the updates above.
if (FC0.Latch != FC0.ExitingBlock)
Expand Down Expand Up @@ -1522,9 +1559,15 @@ struct LoopFuser {

// Is there a way to keep SE up-to-date so we don't need to forget the loops
// and rebuild the information in subsequent passes of fusion?
// Note: Need to forget the loops before merging the loop latches, as
// mergeLatch may remove the only block in FC1.
SE.forgetLoop(FC1.L);
SE.forgetLoop(FC0.L);

// Move instructions from FC0.Latch to FC1.Latch.
// Note: mergeLatch requires an updated DT.
mergeLatch(FC0, FC1);

// Merge the loops.
SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
FC1.L->block_end());
Expand Down
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
Expand Up @@ -117,9 +117,9 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
if (MoveForward) {
// When I is being moved forward, we need to make sure the InsertPoint
// dominates every users. Or else, a user may be using an undefined I.
for (const Value *User : I.users())
if (auto *UserInst = dyn_cast<Instruction>(User))
if (!DT.dominates(&InsertPoint, UserInst))
for (const Use &U : I.uses())
if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
return false;
} else {
// When I is being moved backward, we need to make sure all its opernads
Expand Down Expand Up @@ -173,3 +173,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,

return true;
}

void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
const DominatorTree &DT,
const PostDominatorTree &PDT, DependenceInfo &DI) {
for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
Instruction &I = *It;
// Increment the iterator before modifying FromBB.
++It;

if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
I.moveBefore(MovePos);
}
}
16 changes: 5 additions & 11 deletions llvm/test/Transforms/LoopFusion/four_loops.ll
Expand Up @@ -9,20 +9,14 @@
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]]
; CHECK: br label %[[LOOP2BODY:bb[0-9]+]]
; CHECK: [[LOOP2BODY]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]]
; CHECK: br label %[[LOOP3BODY:bb[0-9]+]]
; CHECK: [[LOOP3BODY]]
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
; CHECK: [[LOOP3LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]]
; CHECK: br label %[[LOOP4BODY:bb[0-9]+]]
; CHECK: [[LOOP4BODY]]
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
; CHECK: [[LOOP4LATCH]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]]
; CHECK: ret void
define void @dep_free() {
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Transforms/LoopFusion/guarded.ll
Expand Up @@ -8,8 +8,6 @@
; CHECK: [[LOOP1PREHEADER]]
; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]]
; CHECK: [[LOOP1BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2BODY]]
; CHECK: [[LOOP2BODY]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]]
; CHECK: [[LOOP2EXIT]]
; CHECK: br label %[[LOOP1SUCC]]
Expand Down
31 changes: 14 additions & 17 deletions llvm/test/Transforms/LoopFusion/loop_nest.ll
Expand Up @@ -25,19 +25,16 @@
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]]
; CHECK: [[LOOP3HEADER]]
; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]]
; CHECK: [[LOOP3LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]]
; CHECK: [[LOOP2PREHEADER]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]]
; CHECK: [[LOOP4HEADER]]
; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]]
; CHECK: [[LOOP4LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]]
; CHECK: [[LOOP1LATCH]]
; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1
; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]]
; CHECK: ret void

; TODO: The current version of loop fusion does not allow the inner loops to be
Expand All @@ -48,8 +45,8 @@ bb:
br label %bb16

bb16: ; preds = %bb, %bb27
%.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ]
%indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ]
%.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ]
%indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ]
br label %bb18

bb30: ; preds = %bb27
Expand All @@ -73,10 +70,10 @@ bb25: ; preds = %bb18
br i1 %exitcond9, label %bb18, label %bb27

bb27: ; preds = %bb25
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1
%tmp28 = add nuw nsw i32 %.06, 1
%exitcond12 = icmp ne i64 %indvars.iv.next11, 100
br i1 %exitcond12, label %bb16, label %bb30
%inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1
%add.outer.fc0 = add nuw nsw i32 %.06, 1
%cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100
br i1 %cmp.outer.fc0, label %bb16, label %bb30

bb33: ; preds = %bb30, %bb45
%.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ]
Expand Down
68 changes: 54 additions & 14 deletions llvm/test/Transforms/LoopFusion/simple.ll
Expand Up @@ -6,9 +6,7 @@
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
Expand Down Expand Up @@ -72,9 +70,7 @@ bb29: ; preds = %bb18
; CHECK: [[LOOP1PREHEADER]]
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
Expand Down Expand Up @@ -129,9 +125,7 @@ bb27: ; preds = %bb17
; CHECK-NEXT: bb:
; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
Expand Down Expand Up @@ -179,8 +173,6 @@ bb19: ; preds = %bb18
; CHECK: [[LOOP1PREHEADER]]
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]]
; CHECK: ret void
define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) {
Expand Down Expand Up @@ -217,9 +209,7 @@ bb23: ; preds = %bb17, %bb
; CHECK-NEXT: bb:
; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]]
; CHECK: [[LOOP1HEADER]]
; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]]
; CHECK: [[LOOP1LATCH]]
; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]]
; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]]
; CHECK: [[LOOP2HEADER]]
; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]]
; CHECK: [[LOOP2LATCH]]
Expand Down Expand Up @@ -266,3 +256,53 @@ bb25: ; preds = %bb19
bb26: ; preds = %bb25
ret void
}

; Test that instructions in loop 1 latch are moved to the beginning of loop 2
; latch iff it is proven safe. %inc.first and %cmp.first are moved, but
; `store i32 0, i32* %Ai.first` is not.

; CHECK: void @flow_dep
; CHECK-LABEL: entry:
; CHECK-NEXT: br label %for.first
; CHECK-LABEL: for.first:
; CHECK: store i32 0, i32* %Ai.first
; CHECK: %Ai.second =
; CHECK: br label %for.second.latch
; CHECK-LABEL: for.second.latch:
; CHECK-NEXT: %inc.first = add nsw i64 %i.first, 1
; CHECK-NEXT: %cmp.first = icmp slt i64 %inc.first, 100
; CHECK: br i1 %cmp.second, label %for.first, label %for.end
; CHECK-LABEL: for.end:
; CHECK-NEXT: ret void

define void @flow_dep(i32* noalias %A, i32* noalias %B) {
entry:
br label %for.first

for.first:
%i.first = phi i64 [ 0, %entry ], [ %inc.first, %for.first ]
%Ai.first = getelementptr inbounds i32, i32* %A, i64 %i.first
store i32 0, i32* %Ai.first, align 4
%inc.first = add nsw i64 %i.first, 1
%cmp.first = icmp slt i64 %inc.first, 100
br i1 %cmp.first, label %for.first, label %for.second.preheader

for.second.preheader:
br label %for.second

for.second:
%i.second = phi i64 [ %inc.second, %for.second.latch ], [ 0, %for.second.preheader ]
%Ai.second = getelementptr inbounds i32, i32* %A, i64 %i.second
%0 = load i32, i32* %Ai.second, align 4
%Bi = getelementptr inbounds i32, i32* %B, i64 %i.second
store i32 %0, i32* %Bi, align 4
br label %for.second.latch

for.second.latch:
%inc.second = add nsw i64 %i.second, 1
%cmp.second = icmp slt i64 %inc.second, 100
br i1 %cmp.second, label %for.second, label %for.end

for.end:
ret void
}

0 comments on commit 36bdc3d

Please sign in to comment.