diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 831911ab21c22..6f6d1f9c76068 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6813,6 +6813,18 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) return std::nullopt; + // Inserting instructions in the preheader has a runtime cost, scale + // the allowed cost with the loops trip count as best we can. + const unsigned ExpansionBudget = [&]() { + unsigned Budget = 2 * SCEVCheapExpansionBudget; + if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L)) + return std::min(Budget, SmallTC); + if (std::optional SmallTC = getLoopEstimatedTripCount(L)) + return std::min(Budget, *SmallTC); + // Unknown trip count, assume long running by default. + return Budget; + }(); + const SCEV *BECount = SE.getBackedgeTakenCount(L); const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); @@ -6862,8 +6874,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, continue; } - if (Expander.isHighCostExpansion(TermValueSLocal, L, - 2*SCEVCheapExpansionBudget, + if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget, &TTI, InsertPt)) { LLVM_DEBUG( dbgs() << "Is too expensive to expand terminating value for phi node" diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index be24ecf112e30..7299a014b7983 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -482,20 +482,15 @@ define void @profiled_short_tc(ptr %a, i32 %offset, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1 ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]] -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !prof [[PROF0:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -637,16 +632,15 @@ define void @small_tc_trivial_loop(ptr %a, i32 %offset) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1 ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]] -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 1 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -673,17 +667,15 @@ define void @small_tc_below_threshold(ptr %a, i32 %offset) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OFFSET_NONZERO:%.*]] = or i32 [[OFFSET:%.*]], 1 ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[OFFSET_NONZERO]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], 1 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 [[OFFSET_NONZERO]] -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 2 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ;