Skip to content

Commit

Permalink
[IRCE] Support inverted range check's predicate
Browse files Browse the repository at this point in the history
IRCE expects true edge of range check's branch comes to loop.
If it meets reverse case - invert the branch.

Reviewed By: skatkov

Differential Revision: https://reviews.llvm.org/D148244
  • Loading branch information
aleks-tmb committed Jul 3, 2023
1 parent 7905c48 commit 64e289c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 23 deletions.
41 changes: 28 additions & 13 deletions llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
Expand Down Expand Up @@ -221,10 +222,9 @@ class InductiveRangeCheck {
///
/// NB! There may be conditions feeding into \p BI that aren't inductive range
/// checks, and hence don't end up in \p Checks.
static void
extractRangeChecksFromBranch(BranchInst *BI, Loop *L, ScalarEvolution &SE,
BranchProbabilityInfo *BPI,
SmallVectorImpl<InductiveRangeCheck> &Checks);
static void extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
};

struct LoopStructure;
Expand Down Expand Up @@ -388,16 +388,29 @@ void InductiveRangeCheck::extractRangeChecksFromCond(

void InductiveRangeCheck::extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
SmallVectorImpl<InductiveRangeCheck> &Checks) {
SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
return;

unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
"No edges coming to loop?");
BranchProbability LikelyTaken(15, 16);

if (!SkipProfitabilityChecks && BPI &&
BPI->getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
return;

// IRCE expects branch's true edge comes to loop. Invert branch for opposite
// case.
if (IndexLoopSucc != 0) {
IRBuilder<> Builder(BI);
InvertBranch(BI, Builder);
if (BPI)
BPI->swapSuccEdgesProbabilities(BI->getParent());
Changed = true;
}

SmallPtrSet<Value *, 8> Visited;
InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),
Checks, Visited);
Expand Down Expand Up @@ -1863,14 +1876,15 @@ bool InductiveRangeCheckElimination::run(

LLVMContext &Context = Preheader->getContext();
SmallVector<InductiveRangeCheck, 16> RangeChecks;
bool Changed = false;

for (auto *BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
RangeChecks);
RangeChecks, Changed);

if (RangeChecks.empty())
return false;
return Changed;

auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
OS << "irce: looking at loop "; L->print(OS);
Expand All @@ -1891,11 +1905,11 @@ bool InductiveRangeCheckElimination::run(
if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
return false;
return Changed;
}
LoopStructure LS = *MaybeLoopStructure;
if (!isProfitableToTransform(*L, LS))
return false;
return Changed;
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));

Expand Down Expand Up @@ -1924,12 +1938,13 @@ bool InductiveRangeCheckElimination::run(
}

if (!SafeIterRange)
return false;
return Changed;

LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, *SafeIterRange);
bool Changed = LC.run();

if (Changed) {
if (LC.run()) {
Changed = true;

auto PrintConstrainedLoopInfo = [L]() {
dbgs() << "irce: in function ";
dbgs() << L->getHeader()->getParent()->getName() << ": ";
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ define i32 @test_01(i32 %A, i64 %Len, ptr %array) {
; CHECK: guarded:
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[INDVAR]]
; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ADDR]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[ZERO_LOOPEXIT_LOOPEXIT4:%.*]], label [[LATCH]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[LATCH]], label [[ZERO_LOOPEXIT_LOOPEXIT4:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[INDVAR_NEXT]] = add nuw nsw i64 [[INDVAR]], 2
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], 3
Expand Down Expand Up @@ -73,8 +73,8 @@ define i32 @test_01(i32 %A, i64 %Len, ptr %array) {
; CHECK: guarded.postloop:
; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVAR_POSTLOOP]]
; CHECK-NEXT: [[RES_POSTLOOP:%.*]] = load i32, ptr [[ADDR_POSTLOOP]], align 4
; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp eq i32 [[RES_POSTLOOP]], 0
; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[ZERO_LOOPEXIT_LOOPEXIT:%.*]], label [[LATCH_POSTLOOP]]
; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp ne i32 [[RES_POSTLOOP]], 0
; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LATCH_POSTLOOP]], label [[ZERO_LOOPEXIT_LOOPEXIT:%.*]]
; CHECK: latch.postloop:
; CHECK-NEXT: [[INDVAR_NEXT_POSTLOOP]] = add nuw nsw i64 [[INDVAR_POSTLOOP]], 2
; CHECK-NEXT: [[RES2_POSTLOOP]] = mul i32 [[RES_POSTLOOP]], 3
Expand Down
55 changes: 49 additions & 6 deletions llvm/test/Transforms/IRCE/stride_more_than_1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -702,31 +702,74 @@ out_of_bounds:
}

; Same as test_09 but range check comparison is inversed.
; TODO: IRCE is allowed.
; IRCE is allowed.
define i32 @test_10(ptr %p, ptr %capacity_p, ptr %num_elements_p) {
; CHECK-LABEL: define i32 @test_10
; CHECK-SAME: (ptr [[P:%.*]], ptr [[CAPACITY_P:%.*]], ptr [[NUM_ELEMENTS_P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPACITY:%.*]] = load i32, ptr [[CAPACITY_P]], align 4, !range [[RNG16]]
; CHECK-NEXT: [[NUM_ELEMENTS:%.*]] = load i32, ptr [[NUM_ELEMENTS_P]], align 4, !range [[RNG16]]
; CHECK-NEXT: [[LIMIT:%.*]] = sub i32 [[CAPACITY]], 3
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CAPACITY]], -3
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[CAPACITY]], 2147483646
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[SMAX]]
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LIMIT]], i32 0)
; CHECK-NEXT: [[SMAX2:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1)
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[SMIN3:%.*]] = call i32 @llvm.smin.i32(i32 [[NUM_ELEMENTS]], i32 [[TMP4]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN3]], i32 0)
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp sge i32 [[IV]], [[LIMIT]]
; CHECK-NEXT: br i1 [[CAPACITY_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]], !prof [[PROF19:![0-9]+]]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp slt i32 [[IV]], [[LIMIT]]
; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof [[PROF17]]
; CHECK: backedge:
; CHECK-NEXT: [[IV_WIDE:%.*]] = zext i32 [[IV]] to i64
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE]]
; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[NUM_ELEMENTS]]
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IV_NEXT]], [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
; CHECK: main.exit.selector:
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[IV_NEXT_LCSSA]], [[NUM_ELEMENTS]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]]
; CHECK: main.pseudo.exit:
; CHECK-NEXT: [[IV_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
; CHECK-NEXT: br label [[POSTLOOP:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: [[IV_LCSSA1_PH:%.*]] = phi i32 [ [[IV_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP:%.*]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV_LCSSA]], [[MAIN_EXIT_SELECTOR]] ], [ [[IV_LCSSA1_PH]], [[EXIT_LOOPEXIT:%.*]] ]
; CHECK-NEXT: ret i32 [[IV_LCSSA1]]
; CHECK: out_of_bounds.loopexit:
; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]]
; CHECK: out_of_bounds.loopexit5:
; CHECK-NEXT: br label [[OUT_OF_BOUNDS]]
; CHECK: out_of_bounds:
; CHECK-NEXT: ret i32 -1
; CHECK: postloop:
; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]]
; CHECK: loop.postloop:
; CHECK-NEXT: [[IV_POSTLOOP]] = phi i32 [ [[IV_COPY]], [[POSTLOOP]] ], [ [[IV_NEXT_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP]] ]
; CHECK-NEXT: [[CAPACITY_CHECK_POSTLOOP:%.*]] = icmp slt i32 [[IV_POSTLOOP]], [[LIMIT]]
; CHECK-NEXT: br i1 [[CAPACITY_CHECK_POSTLOOP]], label [[BACKEDGE_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF17]]
; CHECK: backedge.postloop:
; CHECK-NEXT: [[IV_WIDE_POSTLOOP:%.*]] = zext i32 [[IV_POSTLOOP]] to i64
; CHECK-NEXT: [[EL_PTR_POSTLOOP:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE_POSTLOOP]]
; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4
; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4
; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]]
; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]], !irce.loop.clone !6
;
entry:
%capacity = load i32, ptr %capacity_p, !range !4
Expand Down

0 comments on commit 64e289c

Please sign in to comment.