[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr#197324
[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr#197324bababuck wants to merge 1 commit into
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Ryan Buchner (bababuck) ChangesWhen calculating the Patch is 22.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/197324.diff 5 Files Affected:
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3d17c2aadefd5..424bf7dd6604e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6930,9 +6930,14 @@ const ConstantRange &ScalarEvolution::getRangeRef(
}
case scMulExpr: {
const SCEVMulExpr *Mul = cast<SCEVMulExpr>(S);
+ unsigned WrapType = OBO::AnyWrap;
+ if (Mul->hasNoSignedWrap())
+ WrapType |= OBO::NoSignedWrap;
+ if (Mul->hasNoUnsignedWrap())
+ WrapType |= OBO::NoUnsignedWrap;
ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint, Depth + 1);
for (const SCEV *Op : drop_begin(Mul->operands()))
- X = X.multiply(getRangeRef(Op, SignHint, Depth + 1));
+ X = X.multiplyWithNoWrap(getRangeRef(Op, SignHint, Depth + 1), WrapType);
return setRange(Mul, SignHint,
ConservativeResult.intersectWith(X, RangeType));
}
diff --git a/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll b/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
index 38211f7eb079f..317a7feb7a1c1 100644
--- a/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
+++ b/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
@@ -18,7 +18,7 @@ define i32 @abs_nsw(i32 %x) {
; CHECK-LABEL: 'abs_nsw'
; CHECK-NEXT: Classifying expressions for: @abs_nsw
; CHECK-NEXT: %r = call i32 @llvm.abs.i32(i32 %x, i1 true)
-; CHECK-NEXT: --> ((-1 * %x)<nsw> smax %x) U: full-set S: full-set
+; CHECK-NEXT: --> ((-1 * %x)<nsw> smax %x) U: [-2147483647,-2147483648) S: [-2147483647,-2147483648)
; CHECK-NEXT: Determining loop execution counts for: @abs_nsw
;
%r = call i32 @llvm.abs.i32(i32 %x, i1 1)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 480f5cca7ad7b..a58c09912a3d2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -14,19 +14,19 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: mov w9, #100 // =0x64
; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: udiv x9, x9, x8
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: neg x9, x8
-; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #2
+; CHECK-NEXT: mneg x9, x9, x8
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
; CHECK-NEXT: ldr z3, [x0]
-; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: adds x9, x9, x8
; CHECK-NEXT: ldr z4, [x1, #1, mul vl]
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
@@ -97,21 +97,21 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_nonzero_init_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cntd x8
+; CHECK-NEXT: mov w9, #100 // =0x64
; CHECK-NEXT: fmov d1, #1.00000000
+; CHECK-NEXT: udiv x9, x9, x8
; CHECK-NEXT: fmov d2, #2.00000000
-; CHECK-NEXT: cntd x8
-; CHECK-NEXT: neg x9, x8
-; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #2
; CHECK-NEXT: zip2 z0.d, z2.d, z1.d
; CHECK-NEXT: zip1 z1.d, z2.d, z1.d
+; CHECK-NEXT: mneg x9, x9, x8
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
; CHECK-NEXT: ldr z3, [x0]
-; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: adds x9, x9, x8
; CHECK-NEXT: ldr z4, [x1, #1, mul vl]
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
@@ -178,21 +178,21 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: mov w9, #1000 // =0x3e8
; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: udiv x9, x9, x8
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: cntw x8
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: neg x9, x8
-; CHECK-NEXT: mov w10, #1000 // =0x3e8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #4
+; CHECK-NEXT: mneg x9, x9, x8
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
; CHECK-NEXT: ldr z5, [x0]
-; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: adds x9, x9, x8
; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
; CHECK-NEXT: ldr z7, [x1, #1, mul vl]
; CHECK-NEXT: ldr z16, [x1]
diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll
index 5743dc7cce580..48416c83e652f 100644
--- a/llvm/test/CodeGen/AArch64/sinksplat.ll
+++ b/llvm/test/CodeGen/AArch64/sinksplat.ll
@@ -509,17 +509,17 @@ define <vscale x 4 x float> @fmul_scalable(ptr %x, ptr %y) "target-features"="+s
; CHECK-LABEL: fmul_scalable:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: sxtw x8, w8
-; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x9, x8, #4
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ld1rw { z1.s }, p0/z, [x0]
-; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: mov w9, w9
; CHECK-NEXT: .LBB15_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x1]
-; CHECK-NEXT: subs w9, w9, #1
-; CHECK-NEXT: add x1, x1, x8
+; CHECK-NEXT: add x1, x1, x9, lsl #6
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: fmul z2.s, z2.s, z1.s
; CHECK-NEXT: fadd z0.s, z2.s, z0.s
; CHECK-NEXT: b.eq .LBB15_1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 11370a93b2ffa..ea6d706a4e732 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -397,8 +397,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: [[ENTRY:.*:]]
; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
-; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -407,29 +405,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; DEFAULT-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; DEFAULT-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; DEFAULT-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; DEFAULT-NEXT: [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
; DEFAULT-NEXT: [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; DEFAULT-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; DEFAULT-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
; DEFAULT-NEXT: [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
; DEFAULT-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
@@ -440,8 +426,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; OPTSIZE-NEXT: [[ENTRY:.*:]]
; OPTSIZE-NEXT: br label %[[VECTOR_PH:.*]]
; OPTSIZE: [[VECTOR_PH]]:
-; OPTSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; OPTSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -450,29 +434,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; OPTSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; OPTSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: br label %[[VECTOR_BODY:.*]]
; OPTSIZE: [[VECTOR_BODY]]:
-; OPTSIZE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; OPTSIZE-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; OPTSIZE-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; OPTSIZE-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; OPTSIZE-NEXT: [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
; OPTSIZE-NEXT: [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; OPTSIZE-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; OPTSIZE-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
; OPTSIZE-NEXT: [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
; OPTSIZE-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; OPTSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; OPTSIZE: [[MIDDLE_BLOCK]]:
; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; OPTSIZE: [[FOR_COND_CLEANUP]]:
@@ -483,8 +455,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; MINSIZE-NEXT: [[ENTRY:.*:]]
; MINSIZE-NEXT: br label %[[VECTOR_PH:.*]]
; MINSIZE: [[VECTOR_PH]]:
-; MINSIZE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; MINSIZE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -493,29 +463,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; MINSIZE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; MINSIZE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: br label %[[VECTOR_BODY:.*]]
; MINSIZE: [[VECTOR_BODY]]:
-; MINSIZE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; MINSIZE-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; MINSIZE-NEXT: [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; MINSIZE-NEXT: [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; MINSIZE-NEXT: [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
; MINSIZE-NEXT: [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; MINSIZE-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; MINSIZE-NEXT: [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
; MINSIZE-NEXT: [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
; MINSIZE-NEXT: [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; MINSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; MINSIZE: [[MIDDLE_BLOCK]]:
; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; MINSIZE: [[FOR_COND_CLEANUP]]:
@@ -579,7 +537,7 @@ define void @dont_vectorize_with_minsize() {
; DEFAULT-NEXT: store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; DEFAULT: [[FOR_COND_CLEANUP]]:
@@ -605,7 +563,7 @@ define void @dont_vectorize_with_minsize() {
; OPTSIZE-NEXT: store <8 x i16> [[TMP9]], ptr [[TMP6]], align 2
; OPTSIZE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; OPTSIZE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; OPTSIZE: [[MIDDLE_BLOCK]]:
; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]]
; OPTSIZE: [[FOR_COND_CLEANUP]]:
@@ -631,7 +589,7 @@ define void @dont_vectorize_with_minsize() {
; MINSIZE-NEXT: store <2 x i16> [[TMP9]], ptr [[TMP6]], align 2
; MINSIZE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; MINSIZE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; MINSIZE-NEXT:...
[truncated]
|
| ; CHECK-NEXT: cntd x8 | ||
| ; CHECK-NEXT: mov w9, #100 // =0x64 | ||
| ; CHECK-NEXT: movi v0.2d, #0000000000000000 | ||
| ; CHECK-NEXT: udiv x9, x9, x8 | ||
| ; CHECK-NEXT: movi v1.2d, #0000000000000000 | ||
| ; CHECK-NEXT: cntd x8 | ||
| ; CHECK-NEXT: neg x9, x8 | ||
| ; CHECK-NEXT: mov w10, #100 // =0x64 | ||
| ; CHECK-NEXT: ptrue p0.d | ||
| ; CHECK-NEXT: and x9, x9, x10 | ||
| ; CHECK-NEXT: rdvl x10, #2 | ||
| ; CHECK-NEXT: mneg x9, x9, x8 | ||
| ; CHECK-NEXT: .LBB0_1: // %vector.body | ||
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 | ||
| ; CHECK-NEXT: ldr z2, [x0, #1, mul vl] | ||
| ; CHECK-NEXT: ldr z3, [x0] | ||
| ; CHECK-NEXT: subs x9, x9, x8 | ||
| ; CHECK-NEXT: adds x9, x9, x8 |
| ; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 |
There was a problem hiding this comment.
Prior to this change, we could not prove anything about the range of vscale * %const <nuw> since that wrap was not considered, even though %const and vscale have known non-zero ranges.
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) lldb-apilldb-api.functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver.pyIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
nikic
left a comment
There was a problem hiding this comment.
Looks like multiplyWithNoWrap is pretty expensive. From a quick look, the implementation of multiplies on ConstantRange is generally quite inefficient...
|
Tried some basic optimization: 2172698 This was less helpful than expected: https://llvm-compile-time-tracker.com/compare.php?from=424dedcc9fdf25818bae78f712f84df9c95dd46b&to=1017ca7e4e13216cafe0629a29eb643f1667a80f&stat=instructions:u It might be that there is some second order impact beyond the range calculation itself. Edit: I created a PR at #197481. |
I'll dig further into some of the compile time regressions to see if there is a large codegen change to accompany it that would explain the regression. |
When calculating the
ConstantRangefor aMulExprSCEV, we were overly conservative because the NSW/NUW flags were not being considered.