Skip to content

Commit

Permalink
[LSR][term-fold] Adjust expansion budget based on trip count (#80304)
Browse files Browse the repository at this point in the history
Follow up to #74747

This change extends the previously added fixed expansion threshold by
scaling down the cost allowed for an expansion for a loop with either a
small known trip count or a profile which indicates the trip count is
likely small. The goal here is to improve code generation for a loop
nest where the outer loop has a high trip count, and the inner loop runs
only a handful of iterations.

---------

Co-authored-by: Nikita Popov <github@npopov.com>
  • Loading branch information
preames and nikic committed Feb 2, 2024
1 parent b629414 commit 28865da
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 22 deletions.
15 changes: 13 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6813,6 +6813,18 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
return std::nullopt;

// Inserting instructions in the preheader has a runtime cost, scale
// the allowed cost with the loops trip count as best we can.
const unsigned ExpansionBudget = [&]() {
unsigned Budget = 2 * SCEVCheapExpansionBudget;
if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
return std::min(Budget, SmallTC);
if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
return std::min(Budget, *SmallTC);
// Unknown trip count, assume long running by default.
return Budget;
}();

const SCEV *BECount = SE.getBackedgeTakenCount(L);
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
Expand Down Expand Up @@ -6862,8 +6874,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
continue;
}

if (Expander.isHighCostExpansion(TermValueSLocal, L,
2*SCEVCheapExpansionBudget,
if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
&TTI, InsertPt)) {
LLVM_DEBUG(
dbgs() << "Is too expensive to expand terminating value for phi node"
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -482,20 +482,15 @@ define void @profiled_short_tc(ptr %a, i32 %offset, i32 %n) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -637,16 +632,15 @@ define void @small_tc_trivial_loop(ptr %a, i32 %offset) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 1
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand All @@ -673,17 +667,15 @@ define void @small_tc_below_threshold(ptr %a, i32 %offset) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1
; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 2
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand Down

0 comments on commit 28865da

Please sign in to comment.