-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SCEV] Teach SCEVExpander to use zext nneg when possible #70815
Conversation
zext nneg was recently added to the IR in llvm#67982. Teaching SCEVExpander to emit nneg when possible is valuable since SCEV may have proved non-trivial facts about loop bounds which would otherwise be lost when materializing the value.
@llvm/pr-subscribers-llvm-transforms Author: Philip Reames (preames) Changeszext nneg was recently added to the IR in #67982. Teaching SCEVExpander to emit nneg when possible is valuable since SCEV may have proved non-trivial facts about loop bounds which would otherwise be lost when materializing the value. Full diff: https://github.com/llvm/llvm-project/pull/70815.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 5c976d574854283..c8cd9c4ee13e65c 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1293,7 +1293,10 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
Value *V = expand(S->getOperand());
- return Builder.CreateZExt(V, S->getType());
+ auto *Res = Builder.CreateZExt(V, S->getType());
+ if (auto *I = dyn_cast<Instruction>(Res))
+ I->setNonNeg(SE.isKnownNonNegative(S->getOperand()));
+ return Res;
}
Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
diff --git a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
index bda7cf82eff97a8..9c3713bdd5db0a8 100644
--- a/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
+++ b/llvm/test/Transforms/IRCE/iv-plus-offset-range-check.ll
@@ -1088,7 +1088,7 @@ define i8 @test_overflow_check_runtime(i8 %limit, ptr %p) {
; CHECK-NEXT: [[SMAX2:%.*]] = call i8 @llvm.smax.i8(i8 [[SMIN]], i8 -1)
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i8 [[SMAX2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = mul i8 [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[N]] to i16
+; CHECK-NEXT: [[TMP6:%.*]] = zext nneg i8 [[N]] to i16
; CHECK-NEXT: [[TMP7:%.*]] = sub i16 124, [[TMP6]]
; CHECK-NEXT: [[SMIN3:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP7]], i16 0)
; CHECK-NEXT: [[TMP8:%.*]] = trunc i16 [[SMIN3]] to i8
diff --git a/llvm/test/Transforms/IRCE/wide_indvar.ll b/llvm/test/Transforms/IRCE/wide_indvar.ll
index 415ed5750dcc496..10eea088af4f216 100644
--- a/llvm/test/Transforms/IRCE/wide_indvar.ll
+++ b/llvm/test/Transforms/IRCE/wide_indvar.ll
@@ -125,7 +125,7 @@ define i32 @test_increasing_slt_slt_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6:![0-9]+]]
; CHECK-NEXT: [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7:![0-9]+]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smin.i64(i64 [[M]], i64 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -217,7 +217,7 @@ define i32 @test_increasing_slt_slt_wide_general(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[SMAX1]], 1
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 0)
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -309,7 +309,7 @@ define i32 @test_increasing_slt_slt_wide_general_preloop(ptr %n_ptr, ptr %m_ptr)
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[M]], [[SMAX1]]
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[SMAX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP3]], i64 [[TMP4]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN2]], i64 -1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 -1, [[EXIT_PRELOOP_AT]]
@@ -456,7 +456,7 @@ define i32 @test_increasing_slt_slt_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
; CHECK-NEXT: [[TMP14:%.*]] = add nsw i64 [[SMAX12]], 1
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[SMIN13:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN9]], i64 [[TMP15]])
-; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[SMIN14:%.*]] = call i64 @llvm.smin.i64(i64 [[SMIN13]], i64 [[TMP16]])
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN14]], i64 0)
; CHECK-NEXT: [[TMP17:%.*]] = icmp slt i64 0, [[EXIT_MAINLOOP_AT]]
@@ -719,7 +719,7 @@ define i32 @test_increasing_ult_ult_wide_non-negative(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N:%.*]] = load i32, ptr [[N_PTR]], align 4, !range [[RNG6]]
; CHECK-NEXT: [[M:%.*]] = load i64, ptr [[M_PTR]], align 4, !range [[RNG7]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[M]], i64 [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -809,7 +809,7 @@ define i32 @test_increasing_ult_ult_wide_general(ptr %n_ptr, ptr %m_ptr) {
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[SMIN]], i64 -1)
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[SMAX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
@@ -918,7 +918,7 @@ define i32 @test_increasing_ult_ult_wide_multiple_checks(ptr %n_ptr, ptr %m1_ptr
; CHECK-NEXT: [[TMP10:%.*]] = add nsw i64 [[SMAX7]], 1
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[UMIN8:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN5]], i64 [[TMP11]])
-; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN8]], i64 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i64 0, [[EXIT_MAINLOOP_AT]]
; CHECK-NEXT: br i1 [[TMP13]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 8819df6d013feed..d912540c95657f3 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -230,7 +230,7 @@ define void @promote_latch_condition_decrementing_loop_02(ptr %p, ptr %a) {
; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loopexit.loopexit:
; CHECK-NEXT: br label [[LOOPEXIT]]
@@ -273,7 +273,7 @@ define void @promote_latch_condition_decrementing_loop_03(ptr %p, ptr %a) {
; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LEN]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loopexit.loopexit:
diff --git a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
index 01eaa46d981e45b..d24f9a4e40e3895 100644
--- a/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -16,7 +16,7 @@ define void @_Z3fn1v() {
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
; CHECK: .preheader4.lr.ph:
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
diff --git a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
index 7aa7e6e69bcb780..ea2cfe74be2649b 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll
@@ -17,7 +17,7 @@ define void @test(ptr %ptr) {
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index aba2532706de5c6..25352ee0991bade 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1200,7 +1200,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -1278,7 +1278,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i16 [[SMAX]], 5
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
|
@@ -1293,7 +1293,10 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { | |||
|
|||
Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { | |||
Value *V = expand(S->getOperand()); | |||
return Builder.CreateZExt(V, S->getType()); | |||
auto *Res = Builder.CreateZExt(V, S->getType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should add a IsNNeg bool param to IRBuilder::CreateZExt.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed. Once we have a couple examples, a NFC to restructure the API might be called for.
My main point of uncertainty is what we're going to want for the ZExtOrTrunc variants. Still waiting to see what actual idiomatic usage looks like.
There is a test failure in CodeGen/partial_write_in_region_with_loop.ll. (We should really drop polly from the monorepo...) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the test failure fixed.
zext nneg was recently added to the IR in #67982. Teaching SCEVExpander to emit nneg when possible is valuable since SCEV may have proved non-trivial facts about loop bounds which would otherwise be lost when materializing the value.