diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 6e1aabae06d36..c834e51b5f292 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1403,8 +1403,17 @@ createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, } if (SkipLastIter) { - const SCEV *One = SE->getOne(MaxIter->getType()); - MaxIter = SE->getMinusSCEV(MaxIter, One); + // Semantically skip last iter is "subtract 1, do not bother about unsigned + // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal + // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. + // So we manually construct umin(a - 1, b - 1). + SmallVector Elements; + if (auto *UMin = dyn_cast(MaxIter)) { + for (auto *Op : UMin->operands()) + Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); + MaxIter = SE->getUMinFromMismatchedTypes(Elements); + } else + MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); } // Check if there is a loop-invariant predicate equivalent to our check. @@ -1724,6 +1733,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { bool Changed = false; bool SkipLastIter = false; + const SCEV *CurrMaxExit = SE->getCouldNotCompute(); + auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { + if (SkipLastIter || isa(MaxExitCount)) + return; + if (isa(CurrMaxExit)) + CurrMaxExit = MaxExitCount; + else + CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); + // If the loop has more than 1 iteration, all further checks will be + // executed 1 iteration less. + if (CurrMaxExit == MaxBECount) + SkipLastIter = true; + }; SmallSet DominatingExactExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); @@ -1759,17 +1781,11 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { Changed = true; else if (SkipLastIter && OptimizeCond(true)) Changed = true; - if (MaxBECount == MaxExitCount) - // If the loop has more than 1 iteration, all further checks will be - // executed 1 iteration less. - SkipLastIter = true; + UpdateSkipLastIter(MaxExitCount); continue; } - if (MaxBECount == MaxExitCount) - // If the loop has more than 1 iteration, all further checks will be - // executed 1 iteration less. - SkipLastIter = true; + UpdateSkipLastIter(ExactExitCount); // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this diff --git a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll index 809468f8d4e91..bd5b17ace2e9c 100644 --- a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll +++ b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll @@ -842,13 +842,14 @@ failed: ret i32 -3 } -; TODO: Same as test_litter_conditions, but an extra check with known exact exit count is preventing the opt. +; Same as test_litter_conditions, but an extra check with known exact exit count is preventing the opt. define i32 @test_litter_conditions_constant(i32 %start, i32 %len) { ; CHECK-LABEL: @test_litter_conditions_constant( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[CANONICAL_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[CANONICAL_IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[CONSTANT_CHECK:%.*]] = icmp ult i32 [[CANONICAL_IV]], 65635 ; CHECK-NEXT: br i1 [[CONSTANT_CHECK]], label [[CONSTANT_CHECK_PASSED:%.*]], label [[CONSTANT_CHECK_FAILED:%.*]] @@ -858,10 +859,9 @@ define i32 @test_litter_conditions_constant(i32 %start, i32 %len) { ; CHECK-NEXT: [[AND_1:%.*]] = and i1 [[ZERO_CHECK]], [[FAKE_1]] ; CHECK-NEXT: br i1 [[AND_1]], label [[RANGE_CHECK_BLOCK:%.*]], label [[FAILED_1:%.*]] ; CHECK: range_check_block: -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] ; CHECK-NEXT: [[FAKE_2:%.*]] = call i1 @cond() -; CHECK-NEXT: [[AND_2:%.*]] = and i1 [[RANGE_CHECK]], [[FAKE_2]] +; CHECK-NEXT: [[AND_2:%.*]] = and i1 [[RANGE_CHECK_FIRST_ITER]], [[FAKE_2]] ; CHECK-NEXT: br i1 [[AND_2]], label [[BACKEDGE]], label [[FAILED_2:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1