Skip to content

Commit

Permalink
[LoopBoundSplit] Handle the case in which exiting block is loop header
Browse files Browse the repository at this point in the history
Update the incoming value of phi nodes in header of post-loop correctly.

Differential Revision: https://reviews.llvm.org/D110060
  • Loading branch information
jaykang10 committed Oct 8, 2021
1 parent dd5991c commit 4c98070
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 48 deletions.
84 changes: 59 additions & 25 deletions llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
Expand Up @@ -40,6 +40,8 @@ struct ConditionInfo {
ICmpInst::Predicate Pred;
/// AddRec llvm value
Value *AddRecValue;
/// Non PHI AddRec llvm value
Value *NonPHIAddRecValue;
/// Bound llvm value
Value *BoundValue;
/// AddRec SCEV
Expand All @@ -55,7 +57,7 @@ struct ConditionInfo {
} // namespace

static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,
ConditionInfo &Cond) {
ConditionInfo &Cond, const Loop &L) {
Cond.ICmp = ICmp;
if (match(ICmp, m_ICmp(Cond.Pred, m_Value(Cond.AddRecValue),
m_Value(Cond.BoundValue)))) {
Expand All @@ -72,6 +74,14 @@ static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,

Cond.AddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
Cond.BoundSCEV = BoundSCEV;
Cond.NonPHIAddRecValue = Cond.AddRecValue;

// If the Cond.AddRecValue is PHI node, update Cond.NonPHIAddRecValue with
// value from backedge.
if (Cond.AddRecSCEV && isa<PHINode>(Cond.AddRecValue)) {
PHINode *PN = cast<PHINode>(Cond.AddRecValue);
Cond.NonPHIAddRecValue = PN->getIncomingValueForBlock(L.getLoopLatch());
}
}
}

Expand Down Expand Up @@ -123,7 +133,7 @@ static bool calculateUpperBound(const Loop &L, ScalarEvolution &SE,
static bool hasProcessableCondition(const Loop &L, ScalarEvolution &SE,
ICmpInst *ICmp, ConditionInfo &Cond,
bool IsExitCond) {
analyzeICmp(SE, ICmp, Cond);
analyzeICmp(SE, ICmp, Cond, L);

// The BoundSCEV should be evaluated at loop entry.
if (!SE.isAvailableAtLoopEntry(Cond.BoundSCEV, &L))
Expand Down Expand Up @@ -353,13 +363,45 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
".split", &LI, &DT, PostLoopBlocks);
remapInstructionsInBlocks(PostLoopBlocks, VMap);

// Add conditional branch to check we can skip post-loop in its preheader.
BasicBlock *PostLoopPreHeader = PostLoop->getLoopPreheader();
IRBuilder<> Builder(PostLoopPreHeader);
IRBuilder<> Builder(&PostLoopPreHeader->front());

// Update phi nodes in header of post-loop.
bool isExitingLatch =
(L.getExitingBlock() == L.getLoopLatch()) ? true : false;
Value *ExitingCondLCSSAPhi = nullptr;
for (PHINode &PN : L.getHeader()->phis()) {
// Create LCSSA phi node in preheader of post-loop.
PHINode *LCSSAPhi =
Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
LCSSAPhi->setDebugLoc(PN.getDebugLoc());
// If the exiting block is loop latch, the phi does not have the update at
// last iteration. In this case, update lcssa phi with value from backedge.
LCSSAPhi->addIncoming(
isExitingLatch ? PN.getIncomingValueForBlock(L.getLoopLatch()) : &PN,
L.getExitingBlock());

// Update the start value of phi node in post-loop with the LCSSA phi node.
PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader, LCSSAPhi);

// Find PHI with exiting condition from pre-loop. The PHI should be
// SCEVAddRecExpr and have same incoming value from backedge with
// ExitingCond.
if (!SE.isSCEVable(PN.getType()))
continue;

const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
if (PhiSCEV && ExitingCond.NonPHIAddRecValue ==
PN.getIncomingValueForBlock(L.getLoopLatch()))
ExitingCondLCSSAPhi = LCSSAPhi;
}

// Add conditional branch to check we can skip post-loop in its preheader.
Instruction *OrigBI = PostLoopPreHeader->getTerminator();
ICmpInst::Predicate Pred = ICmpInst::ICMP_NE;
Value *Cond =
Builder.CreateICmp(Pred, ExitingCond.AddRecValue, ExitingCond.BoundValue);
Builder.CreateICmp(Pred, ExitingCondLCSSAPhi, ExitingCond.BoundValue);
Builder.CreateCondBr(Cond, PostLoop->getHeader(), PostLoop->getExitBlock());
OrigBI->eraseFromParent();

Expand All @@ -380,21 +422,6 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Replace exiting bound value of pre-loop NewBound.
ExitingCond.ICmp->setOperand(1, NewBoundValue);

// Replace IV's start value of post-loop by NewBound.
for (PHINode &PN : L.getHeader()->phis()) {
// Find PHI with exiting condition from pre-loop.
if (SE.isSCEVable(PN.getType()) && isa<SCEVAddRecExpr>(SE.getSCEV(&PN))) {
for (Value *Op : PN.incoming_values()) {
if (Op == ExitingCond.AddRecValue) {
// Find cloned PHI for post-loop.
PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader,
NewBoundValue);
}
}
}
}

// Replace SplitCandidateCond.BI's condition of pre-loop by True.
LLVMContext &Context = PreHeader->getContext();
SplitCandidateCond.BI->setCondition(ConstantInt::getTrue(Context));
Expand All @@ -411,15 +438,25 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);

// Update phi node in exit block of post-loop.
Builder.SetInsertPoint(&PostLoopPreHeader->front());
for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
for (auto i : seq<int>(0, PN.getNumOperands())) {
// Check incoming block is pre-loop's exiting block.
if (PN.getIncomingBlock(i) == L.getExitingBlock()) {
Value *IncomingValue = PN.getIncomingValue(i);

// Create LCSSA phi node for incoming value.
PHINode *LCSSAPhi =
Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
LCSSAPhi->setDebugLoc(PN.getDebugLoc());
LCSSAPhi->addIncoming(IncomingValue, PN.getIncomingBlock(i));

// Replace pre-loop's exiting block by post-loop's preheader.
PN.setIncomingBlock(i, PostLoopPreHeader);
// Replace incoming value by LCSSAPhi.
PN.setIncomingValue(i, LCSSAPhi);
// Add a new incoming value with post-loop's exiting block.
PN.addIncoming(VMap[PN.getIncomingValue(i)],
PostLoop->getExitingBlock());
PN.addIncoming(VMap[IncomingValue], PostLoop->getExitingBlock());
}
}
}
Expand All @@ -432,10 +469,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
SE.forgetLoop(&L);

// Canonicalize loops.
// TODO: Try to update LCSSA information according to above change.
formLCSSA(L, DT, &LI, &SE);
simplifyLoop(&L, &DT, &LI, &SE, nullptr, nullptr, true);
formLCSSA(*PostLoop, DT, &LI, &SE);
simplifyLoop(PostLoop, &DT, &LI, &SE, nullptr, nullptr, true);

// Add new post-loop to loop pass manager.
Expand Down
Expand Up @@ -24,14 +24,14 @@ define i16 @test_int() {
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i16 [[I]], 3
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY]], label [[ENTRY_SPLIT_SPLIT:%.*]]
; CHECK: entry.split.split:
; CHECK-NEXT: [[CALL_LCSSA1:%.*]] = phi i16 [ [[CALL]], [[COND_END]] ]
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i16 [ [[I]], [[COND_END]] ]
; CHECK-NEXT: [[CALL_LCSSA_LCSSA:%.*]] = phi i16 [ [[CALL]], [[COND_END]] ]
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i16 [ [[ADD]], [[COND_END]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i16 [[I_LCSSA]], 11
; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_BODY_SPLIT_PREHEADER:%.*]], label [[END:%.*]]
; CHECK: for.body.split.preheader:
; CHECK-NEXT: br label [[FOR_BODY_SPLIT:%.*]]
; CHECK: for.body.split:
; CHECK-NEXT: [[I_SPLIT:%.*]] = phi i16 [ [[ADD_SPLIT:%.*]], [[COND_END_SPLIT:%.*]] ], [ 0, [[FOR_BODY_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[I_SPLIT:%.*]] = phi i16 [ [[ADD_SPLIT:%.*]], [[COND_END_SPLIT:%.*]] ], [ [[I_LCSSA]], [[FOR_BODY_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[CMP1_SPLIT:%.*]] = icmp ult i16 [[I_SPLIT]], 3
; CHECK-NEXT: br i1 false, label [[COND_TRUE_SPLIT:%.*]], label [[COND_FALSE_SPLIT:%.*]]
; CHECK: cond.false.split:
Expand All @@ -47,7 +47,7 @@ define i16 @test_int() {
; CHECK-NEXT: [[CALL_LCSSA_PH:%.*]] = phi i16 [ [[CALL_SPLIT]], [[COND_END_SPLIT]] ]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[CALL_LCSSA:%.*]] = phi i16 [ [[CALL_LCSSA1]], [[ENTRY_SPLIT_SPLIT]] ], [ [[CALL_LCSSA_PH]], [[END_LOOPEXIT]] ]
; CHECK-NEXT: [[CALL_LCSSA:%.*]] = phi i16 [ [[CALL_LCSSA_LCSSA]], [[ENTRY_SPLIT_SPLIT]] ], [ [[CALL_LCSSA_PH]], [[END_LOOPEXIT]] ]
; CHECK-NEXT: ret i16 [[CALL_LCSSA]]
;
entry:
Expand Down
105 changes: 105 additions & 0 deletions llvm/test/Transforms/LoopBoundSplit/bug51866.ll
@@ -0,0 +1,105 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-bound-split -S < %s | FileCheck %s

@B = external global [10 x i16], align 1

define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]]
; CHECK: entry.split:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY_SPLIT]] ], [ [[INC_0:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[I_1:%.*]] = phi i16 [ 10, [[ENTRY_SPLIT]] ], [ [[INC_0]], [[FOR_INC]] ]
; CHECK-NEXT: [[I_2:%.*]] = phi i16 [ 10, [[ENTRY_SPLIT]] ], [ [[INC_2:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: [[I_3:%.*]] = phi i16 [ 15, [[ENTRY_SPLIT]] ], [ 30, [[FOR_INC]] ]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[I_0]], 5
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[ENTRY_SPLIT_SPLIT:%.*]], label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[I_0]], 5
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @B, i16 0, i16 [[I_0]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 1
; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: call void @foo(i16 [[TMP0]], i16 [[I_3]])
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: call void @bar(i16 [[TMP0]], i16 [[I_3]])
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC_0]] = add nuw nsw i16 [[I_0]], 1
; CHECK-NEXT: [[INC_2]] = add nuw nsw i16 [[I_2]], 2
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: entry.split.split:
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i16 [ [[I_0]], [[FOR_COND]] ]
; CHECK-NEXT: [[I_1_LCSSA:%.*]] = phi i16 [ [[I_1]], [[FOR_COND]] ]
; CHECK-NEXT: [[I_2_LCSSA:%.*]] = phi i16 [ [[I_2]], [[FOR_COND]] ]
; CHECK-NEXT: [[I_3_LCSSA:%.*]] = phi i16 [ [[I_3]], [[FOR_COND]] ]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i16 [[I_0_LCSSA]], 10
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_SPLIT_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.cond.split.preheader:
; CHECK-NEXT: br label [[FOR_COND_SPLIT:%.*]]
; CHECK: for.cond.split:
; CHECK-NEXT: [[I_0_SPLIT:%.*]] = phi i16 [ [[INC_0_SPLIT:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ [[I_0_LCSSA]], [[FOR_COND_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[I_1_SPLIT:%.*]] = phi i16 [ [[INC_0_SPLIT]], [[FOR_INC_SPLIT]] ], [ [[I_1_LCSSA]], [[FOR_COND_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[I_2_SPLIT:%.*]] = phi i16 [ [[INC_2_SPLIT:%.*]], [[FOR_INC_SPLIT]] ], [ [[I_2_LCSSA]], [[FOR_COND_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[I_3_SPLIT:%.*]] = phi i16 [ 30, [[FOR_INC_SPLIT]] ], [ [[I_3_LCSSA]], [[FOR_COND_SPLIT_PREHEADER]] ]
; CHECK-NEXT: [[EXITCOND_NOT_SPLIT:%.*]] = icmp eq i16 [[I_0_SPLIT]], 10
; CHECK-NEXT: br i1 [[EXITCOND_NOT_SPLIT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_SPLIT:%.*]]
; CHECK: for.body.split:
; CHECK-NEXT: [[CMP1_SPLIT:%.*]] = icmp ult i16 [[I_0_SPLIT]], 5
; CHECK-NEXT: [[ARRAYIDX_SPLIT:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @B, i16 0, i16 [[I_0_SPLIT]]
; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_SPLIT]], align 1
; CHECK-NEXT: br i1 false, label [[IF_THEN_SPLIT:%.*]], label [[IF_ELSE_SPLIT:%.*]]
; CHECK: if.else.split:
; CHECK-NEXT: call void @bar(i16 [[TMP2]], i16 [[I_3_SPLIT]])
; CHECK-NEXT: br label [[FOR_INC_SPLIT]]
; CHECK: if.then.split:
; CHECK-NEXT: call void @foo(i16 [[TMP2]], i16 [[I_3_SPLIT]])
; CHECK-NEXT: br label [[FOR_INC_SPLIT]]
; CHECK: for.inc.split:
; CHECK-NEXT: [[INC_0_SPLIT]] = add nuw nsw i16 [[I_0_SPLIT]], 1
; CHECK-NEXT: [[INC_2_SPLIT]] = add nuw nsw i16 [[I_2_SPLIT]], 2
; CHECK-NEXT: br label [[FOR_COND_SPLIT]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%i.0 = phi i16 [ 0, %entry ], [ %inc.0, %for.inc ]
%i.1 = phi i16 [ 10, %entry ], [ %inc.0, %for.inc ]
%i.2 = phi i16 [ 10, %entry ], [ %inc.2, %for.inc ]
%i.3 = phi i16 [ 15, %entry ], [ 30, %for.inc ]
%exitcond.not = icmp eq i16 %i.0, 10
br i1 %exitcond.not, label %for.end, label %for.body

for.body: ; preds = %for.cond
%cmp1 = icmp ult i16 %i.0, 5
%arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @B, i16 0, i16 %i.0
%0 = load i16, i16* %arrayidx, align 1
br i1 %cmp1, label %if.then, label %if.else

if.then: ; preds = %for.body
call void @foo(i16 %0, i16 %i.3)
br label %for.inc

if.else: ; preds = %for.body
call void @bar(i16 %0, i16 %i.3)
br label %for.inc

for.inc: ; preds = %if.else, %if.then
%inc.0 = add nuw nsw i16 %i.0, 1
%inc.2 = add nuw nsw i16 %i.2, 2
br label %for.cond

for.end: ; preds = %for.cond
ret void
}

declare void @foo(i16, i16)
declare void @bar(i16, i16)

0 comments on commit 4c98070

Please sign in to comment.