-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoopConstrainer] Apply loop gurads to check that loop bounds are safe #71531
[LoopConstrainer] Apply loop gurads to check that loop bounds are safe #71531
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Aleksandr Popov (aleks-tmb) ChangesLoop guards that apply to loop SCEV bounds allow IRCE for cases with compound loop bounds such as: if (K > 0 && M > 0) if (K > 0 && M > 0) Otherwise SCEV couldn't prove that loops have safe bounds in these cases. Full diff: https://github.com/llvm/llvm-project/pull/71531.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
index ea6d952cfa7d4f3..a2fb39ec64037f3 100644
--- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
+++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp
@@ -27,6 +27,9 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
return false;
+ Start = SE.applyLoopGuards(Start, L);
+ BoundSCEV = SE.applyLoopGuards(BoundSCEV, L);
+
assert(SE.isKnownNegative(Step) && "expecting negative step");
LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n");
@@ -73,6 +76,9 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV,
if (!SE.isAvailableAtLoopEntry(BoundSCEV, L))
return false;
+ Start = SE.applyLoopGuards(Start, L);
+ BoundSCEV = SE.applyLoopGuards(BoundSCEV, L);
+
LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n");
LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n");
LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n");
diff --git a/llvm/test/Transforms/IRCE/compound-loop-bound.ll b/llvm/test/Transforms/IRCE/compound-loop-bound.ll
index 0930d19e22154fc..e50d8c6127f4011 100644
--- a/llvm/test/Transforms/IRCE/compound-loop-bound.ll
+++ b/llvm/test/Transforms/IRCE/compound-loop-bound.ll
@@ -16,23 +16,56 @@ define void @incrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) {
; CHECK-NEXT: br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 [[M]])
+; CHECK-NEXT: [[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[K]])
+; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN2]], i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ]
-; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1
+; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[IDX_NEXT]] = add nsw i32 [[IDX]], 1
; CHECK-NEXT: [[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]]
-; CHECK-NEXT: br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]]
; CHECK: in.bounds:
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]]
; CHECK-NEXT: store i32 0, ptr [[ADDR]], align 4
; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[SMIN]]
-; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[SMIN]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[POSTLOOP:%.*]]
+; CHECK: out.of.bounds.loopexit:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK: out.of.bounds.loopexit3:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS]]
; CHECK: out.of.bounds:
; CHECK-NEXT: ret void
+; CHECK: exit.loopexit.loopexit:
+; CHECK-NEXT: br label [[EXIT_LOOPEXIT]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
+; CHECK: postloop:
+; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]]
+; CHECK: loop.postloop:
+; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ]
+; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1
+; CHECK-NEXT: [[GUARD_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN]]
+; CHECK-NEXT: br i1 [[GUARD_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]]
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]]
+; CHECK-NEXT: store i32 0, ptr [[ADDR_POSTLOOP]], align 4
+; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[SMIN]]
+; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]], !loop_constrainer.loop.clone !6
;
entry:
%len = load i32, ptr %len_ptr, !range !0
@@ -78,24 +111,58 @@ define void @decrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) {
; CHECK-NEXT: [[AND:%.*]] = and i1 [[CHECK0]], [[CHECK1]]
; CHECK-NEXT: br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: preheader:
-; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT: [[INDVAR_START:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]])
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR_START]], 1
+; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 [[TMP0]])
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 0)
+; CHECK-NEXT: [[EXIT_PRELOOP_AT:%.*]] = add nsw i32 [[SMAX]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[INDVAR_START]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK: loop.preloop.preheader:
+; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]]
+; CHECK: mainloop:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[SMIN]], [[PREHEADER]] ], [ [[IDX_DEC:%.*]], [[IN_BOUNDS:%.*]] ]
-; CHECK-NEXT: [[IDX_DEC]] = sub i32 [[IDX]], 1
+; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY:%.*]], [[MAINLOOP:%.*]] ], [ [[IDX_DEC:%.*]], [[IN_BOUNDS:%.*]] ]
+; CHECK-NEXT: [[IDX_DEC]] = sub nsw i32 [[IDX]], 1
; CHECK-NEXT: [[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]]
-; CHECK-NEXT: br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]]
+; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]]
; CHECK: in.bounds:
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]]
; CHECK-NEXT: store i32 0, ptr [[ADDR]], align 4
; CHECK-NEXT: [[NEXT:%.*]] = icmp sgt i32 [[IDX_DEC]], -1
-; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT: br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]]
+; CHECK: out.of.bounds.loopexit:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK: out.of.bounds.loopexit1:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS]]
; CHECK: out.of.bounds:
; CHECK-NEXT: ret void
+; CHECK: exit.loopexit.loopexit:
+; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
+; CHECK: loop.preloop:
+; CHECK-NEXT: [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_DEC_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ [[INDVAR_START]], [[LOOP_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT: [[IDX_DEC_PRELOOP]] = sub i32 [[IDX_PRELOOP]], 1
+; CHECK-NEXT: [[GUARD_PRELOOP:%.*]] = icmp slt i32 [[IDX_PRELOOP]], [[LEN]]
+; CHECK-NEXT: br i1 [[GUARD_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]]
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_PRELOOP]]
+; CHECK-NEXT: store i32 0, ptr [[ADDR_PRELOOP]], align 4
+; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP7:![0-9]+]], !loop_constrainer.loop.clone !6
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: [[IDX_DEC_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_DEC_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[IDX_DEC_PRELOOP_LCSSA]], -1
+; CHECK-NEXT: br i1 [[TMP3]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT]]
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: [[IDX_PRELOOP_COPY]] = phi i32 [ [[INDVAR_START]], [[PREHEADER]] ], [ [[IDX_DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ [[INDVAR_START]], [[PREHEADER]] ], [ [[IDX_DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[MAINLOOP]]
;
entry:
%len = load i32, ptr %len_ptr, !range !0
diff --git a/llvm/test/Transforms/IRCE/variable-loop-bounds.ll b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
index 43d450b938afea4..d40b9818fb21099 100644
--- a/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
+++ b/llvm/test/Transforms/IRCE/variable-loop-bounds.ll
@@ -950,31 +950,108 @@ define void @signed_var_imm_dec_eq(ptr nocapture %a, ptr nocapture readonly %b,
; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[M]], 1024
; CHECK-NEXT: br i1 [[CMP14]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.preheader:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 1
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 1024)
+; CHECK-NEXT: [[EXIT_PRELOOP_AT:%.*]] = add nsw i32 [[SMAX]], -1
+; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 0)
+; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = add nsw i32 [[SMAX1]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 1024, [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK: for.body.preloop.preheader:
+; CHECK-NEXT: br label [[FOR_BODY_PRELOOP:%.*]]
+; CHECK: for.cond.cleanup.loopexit.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
+; CHECK: mainloop:
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY_PREHEADER3:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK: for.body.preheader3:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_INC:%.*]] ], [ 1024, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_INC:%.*]] ], [ [[IV_PRELOOP_COPY:%.*]], [[FOR_BODY_PREHEADER3]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[IV]], 1024
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_INC]], label [[IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 true, label [[FOR_INC]], label [[IF_ELSE:%.*]]
; CHECK: if.else:
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[MUL]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[MUL]]
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[ADD]], [[IF_ELSE]] ], [ [[MUL]], [[FOR_BODY]] ]
; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[DEC]] = add nsw i32 [[IV]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], [[M]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[DEC]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true
+; CHECK-NEXT: br i1 [[TMP7]], label [[MAIN_EXIT_SELECTOR:%.*]], label [[FOR_BODY]]
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[DEC_LCSSA:%.*]] = phi i32 [ [[DEC]], [[FOR_INC]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[DEC_LCSSA]], [[M]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: [[IV_COPY:%.*]] = phi i32 [ [[IV_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[DEC_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END2:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[DEC_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[POSTLOOP:%.*]]
+; CHECK: for.body.preloop:
+; CHECK-NEXT: [[IV_PRELOOP:%.*]] = phi i32 [ [[DEC_PRELOOP:%.*]], [[FOR_INC_PRELOOP:%.*]] ], [ 1024, [[FOR_BODY_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1_PRELOOP:%.*]] = icmp slt i32 [[IV_PRELOOP]], 1024
+; CHECK-NEXT: [[ARRAYIDX_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PRELOOP]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX2_PRELOOP]], align 4
+; CHECK-NEXT: [[MUL_PRELOOP:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[ARRAYIDX3_PRELOOP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV_PRELOOP]]
+; CHECK-NEXT: br i1 [[CMP1_PRELOOP]], label [[FOR_INC_PRELOOP]], label [[IF_ELSE_PRELOOP:%.*]]
+; CHECK: if.else.preloop:
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3_PRELOOP]], align 4
+; CHECK-NEXT: [[ADD_PRELOOP:%.*]] = add nsw i32 [[TMP11]], [[MUL_PRELOOP]]
+; CHECK-NEXT: br label [[FOR_INC_PRELOOP]]
+; CHECK: for.inc.preloop:
+; CHECK-NEXT: [[STOREMERGE_PRELOOP:%.*]] = phi i32 [ [[ADD_PRELOOP]], [[IF_ELSE_PRELOOP]] ], [ [[MUL_PRELOOP]], [[FOR_BODY_PRELOOP]] ]
+; CHECK-NEXT: store i32 [[STOREMERGE_PRELOOP]], ptr [[ARRAYIDX3_PRELOOP]], align 4
+; CHECK-NEXT: [[DEC_PRELOOP]] = add nsw i32 [[IV_PRELOOP]], -1
+; CHECK-NEXT: [[CMP_PRELOOP:%.*]] = icmp eq i32 [[DEC_PRELOOP]], [[M]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[DEC_PRELOOP]], [[EXIT_PRELOOP_AT]]
+; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP12]], true
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRELOOP_EXIT_SELECTOR:%.*]], label [[FOR_BODY_PRELOOP]], !llvm.loop [[LOOP15:![0-9]+]], !loop_constrainer.loop.clone !5
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: [[DEC_PRELOOP_LCSSA:%.*]] = phi i32 [ [[DEC_PRELOOP]], [[FOR_INC_PRELOOP]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[DEC_PRELOOP_LCSSA]], [[M]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRELOOP_PSEUDO_EXIT]], label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: preloop.pseudo.exit:
+; CHECK-NEXT: [[IV_PRELOOP_COPY]] = phi i32 [ 1024, [[FOR_BODY_PREHEADER]] ], [ [[DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ 1024, [[FOR_BODY_PREHEADER]] ], [ [[DEC_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[MAINLOOP]]
+; CHECK: postloop:
+; CHECK-NEXT: br label [[FOR_BODY_POSTLOOP:%.*]]
+; CHECK: for.body.postloop:
+; CHECK-NEXT: [[IV_POSTLOOP:%.*]] = phi i32 [ [[DEC_POSTLOOP:%.*]], [[FOR_INC_POSTLOOP:%.*]] ], [ [[IV_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT: [[CMP1_POSTLOOP:%.*]] = icmp slt i32 [[IV_POSTLOOP]], 1024
+; CHECK-NEXT: [[ARRAYIDX_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_POSTLOOP]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2_POSTLOOP]], align 4
+; CHECK-NEXT: [[MUL_POSTLOOP:%.*]] = mul nsw i32 [[TMP16]], [[TMP15]]
+; CHECK-NEXT: [[ARRAYIDX3_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV_POSTLOOP]]
+; CHECK-NEXT: br i1 [[CMP1_POSTLOOP]], label [[FOR_INC_POSTLOOP]], label [[IF_ELSE_POSTLOOP:%.*]]
+; CHECK: if.else.postloop:
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3_POSTLOOP]], align 4
+; CHECK-NEXT: [[ADD_POSTLOOP:%.*]] = add nsw i32 [[TMP17]], [[MUL_POSTLOOP]]
+; CHECK-NEXT: br label [[FOR_INC_POSTLOOP]]
+; CHECK: for.inc.postloop:
+; CHECK-NEXT: [[STOREMERGE_POSTLOOP:%.*]] = phi i32 [ [[ADD_POSTLOOP]], [[IF_ELSE_POSTLOOP]] ], [ [[MUL_POSTLOOP]], [[FOR_BODY_POSTLOOP]] ]
+; CHECK-NEXT: store i32 [[STOREMERGE_POSTLOOP]], ptr [[ARRAYIDX3_POSTLOOP]], align 4
+; CHECK-NEXT: [[DEC_POSTLOOP]] = add nsw i32 [[IV_POSTLOOP]], -1
+; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp eq i32 [[DEC_POSTLOOP]], [[M]]
+; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY_POSTLOOP]], !llvm.loop [[LOOP16:![0-9]+]], !loop_constrainer.loop.clone !5
;
entry:
%cmp14 = icmp slt i32 %M, 1024
|
Is the huge IR increase in tests intentional/desired? |
You should only use the applyLoopBounds() result to perform checks -- for expansion, you should use the original SCEV. |
b1ef93b
to
c734c94
Compare
Yes, it's a result of IRCE being applied. |
Thanks, fixed |
7fc25cf
to
0efb39c
Compare
Loop guards that apply to loop SCEV bounds allow IRCE for cases with compound loop bounds such as: if (K > 0 && M > 0) for (i = 0; i < min(K, M); i++) {...} if (K > 0 && M > 0) for (i = min(K, M); i >= 0; i--) {...} Otherwise SCEV couldn't prove that loops have safe bounds in these cases.
0efb39c
to
2e3f05b
Compare
@nikic Hi, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Loop guards that apply to loop SCEV bounds allow IRCE for cases with compound loop bounds such as:
if (K > 0 && M > 0)
for (i = 0; i < min(K, M); i++) {...}
if (K > 0 && M > 0)
for (i = min(K, M); i >= 0; i--) {...}
Otherwise SCEV couldn't prove that loops have safe bounds in these cases.