diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 1962c8ba39fb3..1a42f6b23443e 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -1074,6 +1074,35 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { if (isa(LatchEC)) return false; // profitability - want hot exit in analyzeable set + // At this point, we have found an analyzeable latch, and a widenable + // condition above the loop. If we have a widenable exit within the loop + // (for which we can't compute exit counts), drop the ability to further + // widen so that we gain ability to analyze it's exit count and perform this + // transform. TODO: It'd be nice to know for sure the exit became + // analyzeable after dropping widenability. + { + bool Invalidate = false; + + for (auto *ExitingBB : ExitingBlocks) { + if (LI->getLoopFor(ExitingBB) != L) + continue; + + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + continue; + + Use *Cond, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) && + L->contains(IfTrueBB)) { + WC->set(ConstantInt::getTrue(IfTrueBB->getContext())); + Invalidate = true; + } + } + if (Invalidate) + SE->forgetLoop(L); + } + // The use of umin(all analyzeable exits) instead of latch is subtle, but // important for profitability. We may have a loop which hasn't been fully // canonicalized just yet. If the exit we chose to widen is provably never diff --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll index 0d6eba9015a45..83d85eb74a626 100644 --- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll +++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -989,6 +989,117 @@ guarded: } +; If we have a stray widenable branch in the loop, we should still be able to +; run. This can happen when unswitching's cost model avoids unswitching some +; branches. +define i32 @wb_in_loop(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @wb_in_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP8]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: [[WB_COND:%.*]] = and i1 [[WITHIN_BOUNDS2]], true +; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK: deopt3: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET3]] +; CHECK: guarded2: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %wc2 = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded2 ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded2 ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + call void @unknown() + %within.bounds2 = icmp ult i32 %i, %length + %wb_cond = and i1 %within.bounds2, %wc2 + br i1 %wb_cond, label %guarded2, label %deopt3, !prof !0 + +deopt3: + call void @unknown() + %deoptret3 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret3 + +guarded2: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded2 ] + ret i32 %result +} + + + declare void @unknown() declare i1 @llvm.experimental.widenable.condition()