Skip to content

Commit

Permalink
[LSR] Rewrite IV match for term-fold using existing utilities
Browse files Browse the repository at this point in the history
Main benefit here is making the logic easier to follow, slightly more efficient, and more in line with LFTR.  This is not NFC.  There are three semantic changes here.

First, we drop handling for constants on the LHS of the comparison.  These are non-canonical, and we're very late in the optimization pipeline here, so there's no point in supporting this.  I removed a test which covered this case.

Second, we don't need the almost dead IV to be an addrec.  We just need SCEV to be able to compute a trip count for it.

Third, we require a simple IV for the almost dead IV.  In theory, this removes cases we could have previously handled, but given a) zero testing and b) multiple known correctness issues, I'm adopting an attidute of narrowing this down to something which works correctly, and *then* expanding.
  • Loading branch information
preames committed Mar 20, 2023
1 parent 72073fc commit b952148
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 83 deletions.
66 changes: 20 additions & 46 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6719,49 +6719,23 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
return std::nullopt;
}

// For `IsToFold`, a primary IV can be replaced by other affine AddRec when it
// is only used by the terminating condition. To check for this, we may need
// to traverse through a chain of use-def until we can examine the final
// usage.
// *----------------------*
// *---->| LoopHeader: |
// | | PrimaryIV = phi ... |
// | *----------------------*
// | |
// | |
// | chain of
// | single use
// used by |
// phi |
// | Value
// | / \
// | chain of chain of
// | single use single use
// | / \
// | / \
// *- Value Value --> used by terminating condition
auto IsToFold = [&](PHINode &PN) -> bool {
Value *V = &PN;

while (V->getNumUses() == 1)
V = *V->user_begin();

if (V->getNumUses() != 2)
return false;
BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
Value *RHS = TermCond->getOperand(1);
if (!LHS || !L->isLoopInvariant(RHS))
// We could pattern match the inverse form of the icmp, but that is
// non-canonical, and this pass is running *very* late in the pipeline.
return std::nullopt;

Value *VToPN = nullptr;
Value *VToTermCond = nullptr;
for (User *U : V->users()) {
while (U->getNumUses() == 1) {
if (isa<PHINode>(U))
VToPN = U;
if (U == TermCond)
VToTermCond = U;
U = *U->user_begin();
}
}
return VToPN && VToTermCond;
};
// Find the IV used by the current exit condition.
PHINode *ToFold;
Value *ToFoldStart, *ToFoldStep;
if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
return std::nullopt;

// If that IV isn't dead after we rewrite the exit condition in terms of
// another IV, there's no point in doing the transform.
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
return std::nullopt;

// If this is an IV which we could replace the terminating condition, return
// the final value of the alternative IV on the last iteration.
Expand Down Expand Up @@ -6789,11 +6763,13 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
return TermValueS;
};

PHINode *ToFold = nullptr;
PHINode *ToHelpFold = nullptr;
const SCEV *TermValueS = nullptr;

for (PHINode &PN : L->getHeader()->phis()) {
if (ToFold == &PN)
continue;

if (!SE.isSCEVable(PN.getType())) {
LLVM_DEBUG(dbgs() << "IV of phi '" << PN
<< "' is not SCEV-able, not qualified for the "
Expand All @@ -6809,9 +6785,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
continue;
}

if (IsToFold(PN))
ToFold = &PN;
else if (auto P = getAlternateIVEnd(PN)) {
if (auto P = getAlternateIVEnd(PN)) {
ToHelpFold = &PN;
TermValueS = P;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,14 @@ define void @NonAddRecIV(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i32 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 148
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = mul nsw i32 [[LSR_IV]], 2
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 65536
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand Down
33 changes: 0 additions & 33 deletions llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -297,39 +297,6 @@ define void @IcmpSgt(ptr %a) {
; CHECK-LABEL: @IcmpSgt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 88
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%uglygep = getelementptr i8, ptr %a, i32 84
br label %for.body

for.body: ; preds = %for.body, %entry
%lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ]
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ]
store i32 1, ptr %lsr.iv1, align 4
%lsr.iv.next = add nsw i32 %lsr.iv, -1
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4
%exitcond.not = icmp sgt i32 0, %lsr.iv.next
br i1 %exitcond.not, label %for.body, label %for.end

for.end: ; preds = %for.body
ret void
}

; Invert predicate and branches
define void @IcmpSgt2(ptr %a) {
; CHECK-LABEL: @IcmpSgt2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
Expand Down

0 comments on commit b952148

Please sign in to comment.