Skip to content

Commit

Permalink
[LSR] Fold terminating condition not only for eq and ne.
Browse files Browse the repository at this point in the history
Add opportunity to fold any icmp instruction.
  • Loading branch information
mga-sc committed Mar 20, 2023
1 parent f721fcb commit e4dd7ec
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 36 deletions.
15 changes: 9 additions & 6 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6707,9 +6707,9 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (BI->isUnconditional())
return std::nullopt;
auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
if (!TermCond || !TermCond->isEquality()) {
LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an "
"ICmpInst::eq / ICmpInst::ne\n");
if (!TermCond) {
LLVM_DEBUG(
dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
return std::nullopt;
}
if (!TermCond->hasOneUse()) {
Expand Down Expand Up @@ -6934,9 +6934,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
// FIXME: We are adding a use of an IV here without account for poison safety.
// This is incorrect.
Value *NewTermCond = LatchBuilder.CreateICmp(
OldTermCond->getPredicate(), LoopValue, TermValue,
"lsr_fold_term_cond.replaced_term_cond");
Value *NewTermCond =
LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
"lsr_fold_term_cond.replaced_term_cond");
// Swap successors to exit loop body if IV equals to new TermValue
if (BI->getSuccessor(0) == L->getHeader())
BI->swapSuccessors();

LLVM_DEBUG(dbgs() << "Old term-cond:\n"
<< *OldTermCond << "\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,15 @@ define void @NonSCEVableIV(float %init, float* %A, i32 %N) {
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: [[X_05:%.*]] = phi float [ [[INIT]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: store float [[X_05]], ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[ADD]] = fsub float [[X_05]], [[TMP0]]
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[LSR_IV]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
Expand All @@ -176,20 +176,21 @@ for.end: ; preds = %for.end
ret void
}

define void @NonIcmpEqNe(ptr %a) {
; CHECK-LABEL: define void @NonIcmpEqNe
define void @NonIcmp(ptr %a) {
; CHECK-LABEL: define void @NonIcmp
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 379, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ 378, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sle i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sle i64 [[LSR_IV2]], 0
; CHECK-NEXT: [[FIND_COND:%.*]] = and i1 [[EXITCOND_NOT]], true
; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], -1
; CHECK-NEXT: br i1 [[FIND_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand All @@ -204,7 +205,8 @@ for.body: ; preds = %for.body, %entry
%lsr.iv.next = add nsw i64 %lsr.iv, -1
%uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4
%exitcond.not = icmp sle i64 %lsr.iv.next, 0
br i1 %exitcond.not, label %for.end, label %for.body
%find.cond = and i1 %exitcond.not, 1
br i1 %find.cond, label %for.end, label %for.body

for.end: ; preds = %for.body
ret void
Expand All @@ -215,7 +217,7 @@ define void @TermCondMoreThanOneUse(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600
; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, ptr [[A]], i64 1600
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ -378, [[ENTRY:%.*]] ]
Expand All @@ -225,7 +227,7 @@ define void @TermCondMoreThanOneUse(ptr %a) {
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV2]], 0
; CHECK-NEXT: [[DUMMY:%.*]] = select i1 [[EXITCOND_NOT]], i8 0, i8 1
; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP6]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -278,16 +280,16 @@ define void @ebur128_calc_gating_block(ptr %st, ptr %optional_output) {
; CHECK-NEXT: br i1 [[CMP525_NOT]], label [[FOR_INC11]], label [[FOR_BODY7_LR_PH:%.*]]
; CHECK: for.body7.lr.ph:
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[AUDIO_DATA]], align 8
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]]
; CHECK-NEXT: br label [[FOR_BODY7:%.*]]
; CHECK: for.body7:
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[SCEVGEP4:%.*]], [[FOR_BODY7]] ], [ [[SCEVGEP]], [[FOR_BODY7_LR_PH]] ]
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[UGLYGEP4:%.*]], [[FOR_BODY7]] ], [ [[UGLYGEP]], [[FOR_BODY7_LR_PH]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY7]] ], [ [[UMAX]], [[FOR_BODY7_LR_PH]] ]
; CHECK-NEXT: [[CHANNEL_SUM_127:%.*]] = phi double [ [[CHANNEL_SUM_030]], [[FOR_BODY7_LR_PH]] ], [ [[ADD10:%.*]], [[FOR_BODY7]] ]
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[LSR_IV3]], align 8
; CHECK-NEXT: [[ADD10]] = fadd double [[CHANNEL_SUM_127]], [[TMP4]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]]
; CHECK-NEXT: [[UGLYGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC11_LOOPEXIT:%.*]], label [[FOR_BODY7]]
; CHECK: for.inc11.loopexit:
Expand Down Expand Up @@ -367,14 +369,14 @@ define i64 @alac_seek(ptr %0) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = udiv i64 1, 0
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[DIV]], 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12
; CHECK-NEXT: br label [[FOR_BODY_I:%.*]]
; CHECK: for.body.i:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[FOR_BODY_I]] ], [ [[SCEVGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY_I]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY_I]] ], [ [[TMP1]], [[ENTRY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label [[ALAC_PAKT_BLOCK_OFFSET_EXIT:%.*]], label [[FOR_BODY_I]]
; CHECK: alac_pakt_block_offset.exit:
Expand Down

0 comments on commit e4dd7ec

Please sign in to comment.