diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h index 0d2ea8c6ff7f8..32eb7cc2ab045 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h @@ -45,6 +45,12 @@ bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, const DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI); +/// Move instructions from \p FromBB bottom up to the beginning of \p ToBB +/// when proven safe. +void moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB, + const DominatorTree &DT, const PostDominatorTree &PDT, + DependenceInfo &DI); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index e5ea9149dce8f..a7f4242853fcd 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -1114,6 +1114,29 @@ struct LoopFuser { return FC.ExitBlock->size() == 1; } + /// Simplify the condition of the latch branch of \p FC to true, when both of + /// its successors are the same. + void simplifyLatchBranch(const FusionCandidate &FC) const { + BranchInst *FCLatchBranch = dyn_cast(FC.Latch->getTerminator()); + if (FCLatchBranch) { + assert(FCLatchBranch->isConditional() && + FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) && + "Expecting the two successors of FCLatchBranch to be the same"); + FCLatchBranch->setCondition( + llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType())); + } + } + + /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique + /// successor, then merge FC0.Latch with its unique successor. + void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) { + moveInstsBottomUp(*FC0.Latch, *FC1.Latch, DT, PDT, DI); + if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) { + MergeBlockIntoPredecessor(Succ, &DTU, &LI); + DTU.flush(); + } + } + /// Fuse two fusion candidates, creating a new fused loop. /// /// This method contains the mechanics of fusing two loops, represented by \p @@ -1247,6 +1270,10 @@ struct LoopFuser { FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + // Change the condition of FC0 latch branch to true, as both successors of + // the branch are the same. + simplifyLatchBranch(FC0); + // If FC0.Latch and FC0.ExitingBlock are the same then we have already // performed the updates above. if (FC0.Latch != FC0.ExitingBlock) @@ -1269,9 +1296,15 @@ struct LoopFuser { // Is there a way to keep SE up-to-date so we don't need to forget the loops // and rebuild the information in subsequent passes of fusion? + // Note: Need to forget the loops before merging the loop latches, as + // mergeLatch may remove the only block in FC1. SE.forgetLoop(FC1.L); SE.forgetLoop(FC0.L); + // Move instructions from FC0.Latch to FC1.Latch. + // Note: mergeLatch requires an updated DT. + mergeLatch(FC0, FC1); + // Merge the loops. SmallVector Blocks(FC1.L->block_begin(), FC1.L->block_end()); @@ -1491,6 +1524,10 @@ struct LoopFuser { FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + // Change the condition of FC0 latch branch to true, as both successors of + // the branch are the same. + simplifyLatchBranch(FC0); + // If FC0.Latch and FC0.ExitingBlock are the same then we have already // performed the updates above. if (FC0.Latch != FC0.ExitingBlock) @@ -1522,9 +1559,15 @@ struct LoopFuser { // Is there a way to keep SE up-to-date so we don't need to forget the loops // and rebuild the information in subsequent passes of fusion? + // Note: Need to forget the loops before merging the loop latches, as + // mergeLatch may remove the only block in FC1. SE.forgetLoop(FC1.L); SE.forgetLoop(FC0.L); + // Move instructions from FC0.Latch to FC1.Latch. + // Note: mergeLatch requires an updated DT. + mergeLatch(FC0, FC1); + // Merge the loops. SmallVector Blocks(FC1.L->block_begin(), FC1.L->block_end()); diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp index 7a3a8adfea833..93395ac761ab5 100644 --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -117,9 +117,9 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, if (MoveForward) { // When I is being moved forward, we need to make sure the InsertPoint // dominates every users. Or else, a user may be using an undefined I. - for (const Value *User : I.users()) - if (auto *UserInst = dyn_cast(User)) - if (!DT.dominates(&InsertPoint, UserInst)) + for (const Use &U : I.uses()) + if (auto *UserInst = dyn_cast(U.getUser())) + if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U)) return false; } else { // When I is being moved backward, we need to make sure all its opernads @@ -173,3 +173,17 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, return true; } + +void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB, + const DominatorTree &DT, + const PostDominatorTree &PDT, DependenceInfo &DI) { + for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) { + Instruction *MovePos = ToBB.getFirstNonPHIOrDbg(); + Instruction &I = *It; + // Increment the iterator before modifying FromBB. + ++It; + + if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI)) + I.moveBefore(MovePos); + } +} diff --git a/llvm/test/Transforms/LoopFusion/four_loops.ll b/llvm/test/Transforms/LoopFusion/four_loops.ll index 771e92813f6bb..8f3822b8a9422 100644 --- a/llvm/test/Transforms/LoopFusion/four_loops.ll +++ b/llvm/test/Transforms/LoopFusion/four_loops.ll @@ -9,20 +9,14 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]] +; CHECK: br label %[[LOOP2BODY:bb[0-9]+]] ; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]] +; CHECK: br label %[[LOOP3BODY:bb[0-9]+]] ; CHECK: [[LOOP3BODY]] -; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] -; CHECK: [[LOOP3LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]] +; CHECK: br label %[[LOOP4BODY:bb[0-9]+]] ; CHECK: [[LOOP4BODY]] -; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] -; CHECK: [[LOOP4LATCH]] +; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] +; CHECK: [[LOOP1LATCH]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]] ; CHECK: ret void define void @dep_free() { diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll index 1a131178d25e5..9242e0a518e91 100644 --- a/llvm/test/Transforms/LoopFusion/guarded.ll +++ b/llvm/test/Transforms/LoopFusion/guarded.ll @@ -8,8 +8,6 @@ ; CHECK: [[LOOP1PREHEADER]] ; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]] ; CHECK: [[LOOP1BODY]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2BODY]] -; CHECK: [[LOOP2BODY]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]] ; CHECK: [[LOOP2EXIT]] ; CHECK: br label %[[LOOP1SUCC]] diff --git a/llvm/test/Transforms/LoopFusion/loop_nest.ll b/llvm/test/Transforms/LoopFusion/loop_nest.ll index 8445bedce3fcd..44a0ac8093da9 100644 --- a/llvm/test/Transforms/LoopFusion/loop_nest.ll +++ b/llvm/test/Transforms/LoopFusion/loop_nest.ll @@ -25,19 +25,16 @@ ; CHECK: [[LOOP1HEADER]] ; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]] ; CHECK: [[LOOP3HEADER]] -; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] -; CHECK: [[LOOP3LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]] -; CHECK: [[LOOP2PREHEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]] ; CHECK: [[LOOP4HEADER]] -; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] -; CHECK: [[LOOP4LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] +; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] +; CHECK: [[LOOP1LATCH]] +; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 +; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1 +; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] ; CHECK: ret void ; TODO: The current version of loop fusion does not allow the inner loops to be @@ -48,8 +45,8 @@ bb: br label %bb16 bb16: ; preds = %bb, %bb27 - %.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ] - %indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ] + %.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ] + %indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ] br label %bb18 bb30: ; preds = %bb27 @@ -73,10 +70,10 @@ bb25: ; preds = %bb18 br i1 %exitcond9, label %bb18, label %bb27 bb27: ; preds = %bb25 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1 - %tmp28 = add nuw nsw i32 %.06, 1 - %exitcond12 = icmp ne i64 %indvars.iv.next11, 100 - br i1 %exitcond12, label %bb16, label %bb30 + %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 + %add.outer.fc0 = add nuw nsw i32 %.06, 1 + %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 + br i1 %cmp.outer.fc0, label %bb16, label %bb30 bb33: ; preds = %bb30, %bb45 %.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ] diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll index dc7d8d089eab6..aeb626126d0b1 100644 --- a/llvm/test/Transforms/LoopFusion/simple.ll +++ b/llvm/test/Transforms/LoopFusion/simple.ll @@ -6,9 +6,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -72,9 +70,7 @@ bb29: ; preds = %bb18 ; CHECK: [[LOOP1PREHEADER]] ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -129,9 +125,7 @@ bb27: ; preds = %bb17 ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -179,8 +173,6 @@ bb19: ; preds = %bb18 ; CHECK: [[LOOP1PREHEADER]] ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] -; CHECK: [[LOOP2HEADER]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]] ; CHECK: ret void define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) { @@ -217,9 +209,7 @@ bb23: ; preds = %bb17, %bb ; CHECK-NEXT: bb: ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -266,3 +256,53 @@ bb25: ; preds = %bb19 bb26: ; preds = %bb25 ret void } + +; Test that instructions in loop 1 latch are moved to the beginning of loop 2 +; latch iff it is proven safe. %inc.first and %cmp.first are moved, but +; `store i32 0, i32* %Ai.first` is not. + +; CHECK: void @flow_dep +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %for.first +; CHECK-LABEL: for.first: +; CHECK: store i32 0, i32* %Ai.first +; CHECK: %Ai.second = +; CHECK: br label %for.second.latch +; CHECK-LABEL: for.second.latch: +; CHECK-NEXT: %inc.first = add nsw i64 %i.first, 1 +; CHECK-NEXT: %cmp.first = icmp slt i64 %inc.first, 100 +; CHECK: br i1 %cmp.second, label %for.first, label %for.end +; CHECK-LABEL: for.end: +; CHECK-NEXT: ret void + +define void @flow_dep(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.first + +for.first: + %i.first = phi i64 [ 0, %entry ], [ %inc.first, %for.first ] + %Ai.first = getelementptr inbounds i32, i32* %A, i64 %i.first + store i32 0, i32* %Ai.first, align 4 + %inc.first = add nsw i64 %i.first, 1 + %cmp.first = icmp slt i64 %inc.first, 100 + br i1 %cmp.first, label %for.first, label %for.second.preheader + +for.second.preheader: + br label %for.second + +for.second: + %i.second = phi i64 [ %inc.second, %for.second.latch ], [ 0, %for.second.preheader ] + %Ai.second = getelementptr inbounds i32, i32* %A, i64 %i.second + %0 = load i32, i32* %Ai.second, align 4 + %Bi = getelementptr inbounds i32, i32* %B, i64 %i.second + store i32 %0, i32* %Bi, align 4 + br label %for.second.latch + +for.second.latch: + %inc.second = add nsw i64 %i.second, 1 + %cmp.second = icmp slt i64 %inc.second, 100 + br i1 %cmp.second, label %for.second, label %for.end + +for.end: + ret void +}