-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[VPlan] Use nuw when computing {VF,VScale}xUF #170710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-risc-v Author: Ramkumar Ramachandra (artagnon) ChangesThese quantities should never wrap. Patch is 24.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170710.diff 10 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b3ba38f6c630e..2850167f516f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4571,7 +4571,8 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
VF.replaceAllUsesWith(RuntimeVF);
VPValue *UF = Plan.getConstantInt(TCTy, Plan.getUF());
- VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
+ VPValue *MulByUF = Builder.createOverflowingOp(Instruction::Mul,
+ {RuntimeVF, UF}, {true, true});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4859,7 +4860,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
if (VF.isScalable()) {
VPValue *VScale = PHBuilder.createElementCount(
VectorLoop->getCanonicalIVType(), ElementCount::getScalable(1));
- VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
+ VPValue *VScaleUF = PHBuilder.createOverflowingOp(
+ Instruction::Mul, {VScale, UF}, {true, true});
Inc->setOperand(1, VScaleUF);
Plan.getVF().replaceAllUsesWith(VScale);
} else {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 131b3d1b02727..35b34fa3e52f3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -59,7 +59,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP2]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; IC2-NEXT: [[TMP21:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP11]], 2
+; IC2-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP11]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; IC2-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -172,7 +172,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
; IC2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
; IC2-NEXT: [[TMP9:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; IC2-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP4]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP6:%.*]] = add i64 [[START]], [[N_VEC]]
@@ -294,7 +294,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; IC2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0
; IC2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
+; IC2-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP6]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP7]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
index 47623f3b5d99e..c128c5988c08d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
@@ -15,7 +15,7 @@ define void @induction_i7(ptr %dst) #0 {
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
@@ -74,7 +74,7 @@ define void @induction_i3_zext(ptr %dst) #0 {
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 9989209f8c575..ba6bd8f36f5e4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -21,7 +21,7 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 3
-; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -98,7 +98,7 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP7]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index f2f65685e9bad..c093958174a5c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -106,7 +106,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
@@ -236,7 +236,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]]
; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
@@ -297,7 +297,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]]
; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV32: [[SCALAR_PH]]:
@@ -340,7 +340,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw nsw i64 [[TMP18]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -487,7 +487,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]]
; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
@@ -548,7 +548,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]]
; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV32: [[SCALAR_PH]]:
@@ -591,7 +591,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw nsw i64 [[TMP18]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -714,7 +714,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[EXIT:.*]]
; RV64: [[EXIT]]:
@@ -753,7 +753,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]]
; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[EXIT:.*]]
; RV32: [[EXIT]]:
@@ -769,7 +769,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
@@ -884,7 +884,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
-; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[SCALAR_PH:.*]]
; RV64: [[SCALAR_PH]]:
@@ -935,7 +935,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
-; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[SCALAR_PH:.*]]
; RV32: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 414e5d9295554..d6d1dd13118f8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -46,7 +46,7 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
@@ -150,7 +150,7 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64
@@ -254,7 +254,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
@@ -876,7 +876,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP29]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
+; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul nuw nsw i64 [[TMP29]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP30]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -1204,7 +1204,7 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
+; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP8]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -1335,7 +1335,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -1419,7 +1419,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index d140bc09fe731..8226833e64dd2 100...
[truncated]
|
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesThese quantities should never wrap. Patch is 24.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170710.diff 10 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b3ba38f6c630e..2850167f516f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4571,7 +4571,8 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
VF.replaceAllUsesWith(RuntimeVF);
VPValue *UF = Plan.getConstantInt(TCTy, Plan.getUF());
- VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
+ VPValue *MulByUF = Builder.createOverflowingOp(Instruction::Mul,
+ {RuntimeVF, UF}, {true, true});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4859,7 +4860,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
if (VF.isScalable()) {
VPValue *VScale = PHBuilder.createElementCount(
VectorLoop->getCanonicalIVType(), ElementCount::getScalable(1));
- VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
+ VPValue *VScaleUF = PHBuilder.createOverflowingOp(
+ Instruction::Mul, {VScale, UF}, {true, true});
Inc->setOperand(1, VScaleUF);
Plan.getVF().replaceAllUsesWith(VScale);
} else {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 131b3d1b02727..35b34fa3e52f3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -59,7 +59,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP2]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; IC2-NEXT: [[TMP21:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP11]], 2
+; IC2-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP11]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]]
; IC2-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -172,7 +172,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
; IC2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
; IC2-NEXT: [[TMP9:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; IC2-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP4]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP6:%.*]] = add i64 [[START]], [[N_VEC]]
@@ -294,7 +294,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; IC2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0
; IC2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
+; IC2-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP6]], 2
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP7]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
index 47623f3b5d99e..c128c5988c08d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
@@ -15,7 +15,7 @@ define void @induction_i7(ptr %dst) #0 {
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
@@ -74,7 +74,7 @@ define void @induction_i3_zext(ptr %dst) #0 {
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw i64 [[TMP40]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 9989209f8c575..ba6bd8f36f5e4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -21,7 +21,7 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 3
-; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -98,7 +98,7 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP7]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index f2f65685e9bad..c093958174a5c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -106,7 +106,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
@@ -236,7 +236,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]]
; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
@@ -297,7 +297,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]]
; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV32: [[SCALAR_PH]]:
@@ -340,7 +340,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw nsw i64 [[TMP18]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -487,7 +487,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]]
; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
@@ -548,7 +548,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]]
; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV32: [[SCALAR_PH]]:
@@ -591,7 +591,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 4
-; RV64-UF2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; RV64-UF2-NEXT: [[TMP19:%.*]] = mul nuw nsw i64 [[TMP18]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 [[TMP0]], [[TMP19]]
; RV64-UF2-NEXT: [[TMP20:%.*]] = sub i64 [[TMP0]], [[N_VEC]]
; RV64-UF2-NEXT: [[TMP48:%.*]] = sub i64 [[TMP0]], [[TMP20]]
@@ -714,7 +714,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]]
; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[EXIT:.*]]
; RV64: [[EXIT]]:
@@ -753,7 +753,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]]
; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]]
; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[EXIT:.*]]
; RV32: [[EXIT]]:
@@ -769,7 +769,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) {
; RV64-UF2: [[VECTOR_PH]]:
; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; RV64-UF2-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 2
; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]]
; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]]
; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 1023, [[TMP7]]
@@ -884,7 +884,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
-; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[SCALAR_PH:.*]]
; RV64: [[SCALAR_PH]]:
@@ -935,7 +935,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
-; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; RV32: [[MIDDLE_BLOCK]]:
; RV32-NEXT: br label %[[SCALAR_PH:.*]]
; RV32: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 414e5d9295554..d6d1dd13118f8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -46,7 +46,7 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
@@ -150,7 +150,7 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 64
@@ -254,7 +254,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
; CHECK-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; CHECK-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; CHECK-UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-UF2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 [[N_MOD_VF]]
@@ -876,7 +876,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], 4
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP29]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
+; STRIDED-UF2-NEXT: [[TMP30:%.*]] = mul nuw nsw i64 [[TMP29]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP30]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -1204,7 +1204,7 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
+; STRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP8]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
@@ -1335,7 +1335,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; NOSTRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -1419,7 +1419,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; STRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; STRIDED-UF2-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 2
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; STRIDED-UF2-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index d140bc09fe731..8226833e64dd2 100...
[truncated]
|
These quantities should never wrap.
ac2595b to
7e967a3
Compare
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Could you please add a reference to the description/commit message about the fact that this matches the behavior if only VFxUF is used (and not VF). When computing both VF and VFxUF, NUW should hold for each step separately.
These quantities should never unsigned-wrap. This matches the behavior if only VFxUF is used (and not VF): when computing both VF and VFxUF, nuw should hold for each step separately.
These quantities should never unsigned-wrap. This matches the behavior if only VFxUF is used (and not VF): when computing both VF and VFxUF, nuw should hold for each step separately.