[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr by bababuck · Pull Request #197324 · llvm/llvm-project

bababuck · 2026-05-12T23:06:07Z

When calculating theConstantRange for a MulExpr SCEV, we were overly conservative because the NSW/NUW flags were not being considered.

llvmorg-github-actions · 2026-05-12T23:06:48Z

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-analysis

Author: Ryan Buchner (bababuck)

Changes

When calculating theConstantRange for a MulExpr SCEV, we were overly conservative because the NSW/NUW flags were not being considered.

Patch is 22.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/197324.diff

5 Files Affected:

(modified) llvm/lib/Analysis/ScalarEvolution.cpp (+6-1)
(modified) llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll (+1-1)
(modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll (+15-15)
(modified) llvm/test/CodeGen/AArch64/sinksplat.ll (+6-6)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+21-63)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3d17c2aadefd5..424bf7dd6604e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6930,9 +6930,14 @@ const ConstantRange &ScalarEvolution::getRangeRef(
   }
   case scMulExpr: {
     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(S);
+    unsigned WrapType = OBO::AnyWrap;
+    if (Mul->hasNoSignedWrap())
+      WrapType |= OBO::NoSignedWrap;
+    if (Mul->hasNoUnsignedWrap())
+      WrapType |= OBO::NoUnsignedWrap;
     ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint, Depth + 1);
     for (const SCEV *Op : drop_begin(Mul->operands()))
-      X = X.multiply(getRangeRef(Op, SignHint, Depth + 1));
+      X = X.multiplyWithNoWrap(getRangeRef(Op, SignHint, Depth + 1), WrapType);
     return setRange(Mul, SignHint,
                     ConservativeResult.intersectWith(X, RangeType));
   }
diff --git a/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll b/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
index 38211f7eb079f..317a7feb7a1c1 100644
--- a/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
+++ b/llvm/test/Analysis/ScalarEvolution/abs-intrinsic.ll
@@ -18,7 +18,7 @@ define i32 @abs_nsw(i32 %x) {
 ; CHECK-LABEL: 'abs_nsw'
 ; CHECK-NEXT:  Classifying expressions for: @abs_nsw
 ; CHECK-NEXT:    %r = call i32 @llvm.abs.i32(i32 %x, i1 true)
-; CHECK-NEXT:    --> ((-1 * %x)<nsw> smax %x) U: full-set S: full-set
+; CHECK-NEXT:    --> ((-1 * %x)<nsw> smax %x) U: [-2147483647,-2147483648) S: [-2147483647,-2147483648)
 ; CHECK-NEXT:  Determining loop execution counts for: @abs_nsw
 ;
   %r = call i32 @llvm.abs.i32(i32 %x, i1 1)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 480f5cca7ad7b..a58c09912a3d2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -14,19 +14,19 @@ target triple = "aarch64"
 define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-LABEL: complex_mul_v2f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov w9, #100 // =0x64
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    udiv x9, x9, x8
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    neg x9, x8
-; CHECK-NEXT:    mov w10, #100 // =0x64
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and x9, x9, x10
 ; CHECK-NEXT:    rdvl x10, #2
+; CHECK-NEXT:    mneg x9, x9, x8
 ; CHECK-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr z2, [x0, #1, mul vl]
 ; CHECK-NEXT:    ldr z3, [x0]
-; CHECK-NEXT:    subs x9, x9, x8
+; CHECK-NEXT:    adds x9, x9, x8
 ; CHECK-NEXT:    ldr z4, [x1, #1, mul vl]
 ; CHECK-NEXT:    ldr z5, [x1]
 ; CHECK-NEXT:    add x1, x1, x10
@@ -97,21 +97,21 @@ exit.block:                                     ; preds = %vector.body
 define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
 ; CHECK-LABEL: complex_mul_nonzero_init_v2f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov w9, #100 // =0x64
 ; CHECK-NEXT:    fmov d1, #1.00000000
+; CHECK-NEXT:    udiv x9, x9, x8
 ; CHECK-NEXT:    fmov d2, #2.00000000
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    neg x9, x8
-; CHECK-NEXT:    mov w10, #100 // =0x64
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and x9, x9, x10
 ; CHECK-NEXT:    rdvl x10, #2
 ; CHECK-NEXT:    zip2 z0.d, z2.d, z1.d
 ; CHECK-NEXT:    zip1 z1.d, z2.d, z1.d
+; CHECK-NEXT:    mneg x9, x9, x8
 ; CHECK-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr z2, [x0, #1, mul vl]
 ; CHECK-NEXT:    ldr z3, [x0]
-; CHECK-NEXT:    subs x9, x9, x8
+; CHECK-NEXT:    adds x9, x9, x8
 ; CHECK-NEXT:    ldr z4, [x1, #1, mul vl]
 ; CHECK-NEXT:    ldr z5, [x1]
 ; CHECK-NEXT:    add x1, x1, x10
@@ -178,21 +178,21 @@ exit.block:                                     ; preds = %vector.body
 define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
 ; CHECK-LABEL: complex_mul_v2f64_unrolled:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntw x8
+; CHECK-NEXT:    mov w9, #1000 // =0x3e8
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    udiv x9, x9, x8
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    cntw x8
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
-; CHECK-NEXT:    neg x9, x8
-; CHECK-NEXT:    mov w10, #1000 // =0x3e8
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and x9, x9, x10
 ; CHECK-NEXT:    rdvl x10, #4
+; CHECK-NEXT:    mneg x9, x9, x8
 ; CHECK-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr z4, [x0, #1, mul vl]
 ; CHECK-NEXT:    ldr z5, [x0]
-; CHECK-NEXT:    subs x9, x9, x8
+; CHECK-NEXT:    adds x9, x9, x8
 ; CHECK-NEXT:    ldr z6, [x0, #3, mul vl]
 ; CHECK-NEXT:    ldr z7, [x1, #1, mul vl]
 ; CHECK-NEXT:    ldr z16, [x1]
diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll
index 5743dc7cce580..48416c83e652f 100644
--- a/llvm/test/CodeGen/AArch64/sinksplat.ll
+++ b/llvm/test/CodeGen/AArch64/sinksplat.ll
@@ -509,17 +509,17 @@ define <vscale x 4 x float> @fmul_scalable(ptr %x, ptr %y) "target-features"="+s
 ; CHECK-LABEL: fmul_scalable:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    sxtw x8, w8
-; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    lsr x9, x8, #4
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    ld1rw { z1.s }, p0/z, [x0]
-; CHECK-NEXT:    lsl x8, x8, #2
+; CHECK-NEXT:    mov w9, w9
 ; CHECK-NEXT:  .LBB15_1: // %l1
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr z2, [x1]
-; CHECK-NEXT:    subs w9, w9, #1
-; CHECK-NEXT:    add x1, x1, x8
+; CHECK-NEXT:    add x1, x1, x9, lsl #6
+; CHECK-NEXT:    subs w8, w8, #1
 ; CHECK-NEXT:    fmul z2.s, z2.s, z1.s
 ; CHECK-NEXT:    fadd z0.s, z2.s, z0.s
 ; CHECK-NEXT:    b.eq .LBB15_1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
index 11370a93b2ffa..ea6d706a4e732 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
@@ -397,8 +397,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; DEFAULT-NEXT:  [[ENTRY:.*:]]
 ; DEFAULT-NEXT:    br label %[[VECTOR_PH:.*]]
 ; DEFAULT:       [[VECTOR_PH]]:
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
 ; DEFAULT-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -407,29 +405,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; DEFAULT-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; DEFAULT-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; DEFAULT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; DEFAULT:       [[VECTOR_BODY]]:
-; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; DEFAULT-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; DEFAULT-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; DEFAULT-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
 ; DEFAULT-NEXT:    [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
 ; DEFAULT-NEXT:    [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; DEFAULT-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; DEFAULT-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
 ; DEFAULT-NEXT:    [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
 ; DEFAULT-NEXT:    [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; DEFAULT-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; DEFAULT-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; DEFAULT-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; DEFAULT-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; DEFAULT-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; DEFAULT-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; DEFAULT-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; DEFAULT:       [[MIDDLE_BLOCK]]:
 ; DEFAULT-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; DEFAULT:       [[FOR_COND_CLEANUP]]:
@@ -440,8 +426,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; OPTSIZE-NEXT:  [[ENTRY:.*:]]
 ; OPTSIZE-NEXT:    br label %[[VECTOR_PH:.*]]
 ; OPTSIZE:       [[VECTOR_PH]]:
-; OPTSIZE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; OPTSIZE-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
 ; OPTSIZE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
 ; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
 ; OPTSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -450,29 +434,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; OPTSIZE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
 ; OPTSIZE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; OPTSIZE-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; OPTSIZE-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; OPTSIZE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; OPTSIZE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; OPTSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; OPTSIZE:       [[VECTOR_BODY]]:
-; OPTSIZE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; OPTSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; OPTSIZE-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; OPTSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; OPTSIZE-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
 ; OPTSIZE-NEXT:    [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
 ; OPTSIZE-NEXT:    [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; OPTSIZE-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; OPTSIZE-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
 ; OPTSIZE-NEXT:    [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
 ; OPTSIZE-NEXT:    [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; OPTSIZE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; OPTSIZE-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; OPTSIZE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; OPTSIZE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; OPTSIZE-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; OPTSIZE-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; OPTSIZE-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; OPTSIZE-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; OPTSIZE-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; OPTSIZE-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; OPTSIZE:       [[MIDDLE_BLOCK]]:
 ; OPTSIZE-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; OPTSIZE:       [[FOR_COND_CLEANUP]]:
@@ -483,8 +455,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; MINSIZE-NEXT:  [[ENTRY:.*:]]
 ; MINSIZE-NEXT:    br label %[[VECTOR_PH:.*]]
 ; MINSIZE:       [[VECTOR_PH]]:
-; MINSIZE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; MINSIZE-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
 ; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
 ; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
 ; MINSIZE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -493,29 +463,17 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
 ; MINSIZE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
 ; MINSIZE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; MINSIZE-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
-; MINSIZE-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP6]] to i8
-; MINSIZE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP12]], i64 0
-; MINSIZE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 ; MINSIZE-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; MINSIZE:       [[VECTOR_BODY]]:
-; MINSIZE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 16 x i8> [ [[TMP10]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; MINSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; MINSIZE-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 1)
+; MINSIZE-NEXT:    [[TMP15:%.*]] = mul <vscale x 16 x i8> [[BROADCAST_SPLAT]], [[TMP10]]
+; MINSIZE-NEXT:    [[TMP16:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 1)
 ; MINSIZE-NEXT:    [[TMP17:%.*]] = mul <vscale x 16 x i8> [[TMP16]], [[BROADCAST_SPLAT2]]
 ; MINSIZE-NEXT:    [[TMP18:%.*]] = add <vscale x 16 x i8> [[TMP17]], [[TMP15]]
-; MINSIZE-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[VEC_IND]], splat (i8 2)
+; MINSIZE-NEXT:    [[TMP19:%.*]] = lshr <vscale x 16 x i8> [[TMP10]], splat (i8 2)
 ; MINSIZE-NEXT:    [[TMP20:%.*]] = mul <vscale x 16 x i8> [[TMP19]], [[BROADCAST_SPLAT4]]
 ; MINSIZE-NEXT:    [[TMP21:%.*]] = add <vscale x 16 x i8> [[TMP18]], [[TMP20]]
-; MINSIZE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
-; MINSIZE-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[TMP22]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; MINSIZE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
-; MINSIZE-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 15)
-; MINSIZE-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
-; MINSIZE-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP24]], true
-; MINSIZE-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 16 x i8> [[VEC_IND]], [[DOTSPLAT]]
-; MINSIZE-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; MINSIZE-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP21]], ptr align 1 [[P]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ENTRY]])
+; MINSIZE-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; MINSIZE:       [[MIDDLE_BLOCK]]:
 ; MINSIZE-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; MINSIZE:       [[FOR_COND_CLEANUP]]:
@@ -579,7 +537,7 @@ define void @dont_vectorize_with_minsize() {
 ; DEFAULT-NEXT:    store <8 x i16> [[TMP15]], ptr [[TMP11]], align 2
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; DEFAULT-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; DEFAULT-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; DEFAULT:       [[MIDDLE_BLOCK]]:
 ; DEFAULT-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; DEFAULT:       [[FOR_COND_CLEANUP]]:
@@ -605,7 +563,7 @@ define void @dont_vectorize_with_minsize() {
 ; OPTSIZE-NEXT:    store <8 x i16> [[TMP9]], ptr [[TMP6]], align 2
 ; OPTSIZE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; OPTSIZE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; OPTSIZE-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; OPTSIZE-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; OPTSIZE:       [[MIDDLE_BLOCK]]:
 ; OPTSIZE-NEXT:    br label %[[FOR_COND_CLEANUP:.*]]
 ; OPTSIZE:       [[FOR_COND_CLEANUP]]:
@@ -631,7 +589,7 @@ define void @dont_vectorize_with_minsize() {
 ; MINSIZE-NEXT:    store <2 x i16> [[TMP9]], ptr [[TMP6]], align 2
 ; MINSIZE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; MINSIZE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; MINSIZE-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; MINSIZE-NEXT:...
[truncated]

bababuck · 2026-05-12T23:07:13Z

+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov w9, #100 // =0x64
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    udiv x9, x9, x8
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    cntd x8
-; CHECK-NEXT:    neg x9, x8
-; CHECK-NEXT:    mov w10, #100 // =0x64
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and x9, x9, x10
 ; CHECK-NEXT:    rdvl x10, #2
+; CHECK-NEXT:    mneg x9, x9, x8
 ; CHECK-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr z2, [x0, #1, mul vl]
 ; CHECK-NEXT:    ldr z3, [x0]
-; CHECK-NEXT:    subs x9, x9, x8
+; CHECK-NEXT:    adds x9, x9, x8


See conversation here.

bababuck · 2026-05-12T23:09:11Z

-; DEFAULT-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4


Prior to this change, we could not prove anything about the range of vscale * %const <nuw> since that wrap was not considered, even though %const and vscale have known non-zero ranges.

github-actions · 2026-05-12T23:53:52Z

🐧 Linux x64 Test Results

174216 tests passed
3327 tests skipped
1 test failed

Failed Tests

(click on a test name to see its output)

lldb-api

lldb-api.functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver.py

Script:
--
/usr/bin/python3 /home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/./lib --env LLVM_INCLUDE_DIR=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/include --env LLVM_TOOLS_DIR=/home/gha/actions-runner/_work/llvm-project/llvm-project/build/./bin --libcxx-include-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/include/c++/v1 --libcxx-include-target-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/include/x86_64-unknown-linux-gnu/c++/v1 --libcxx-library-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./lib/x86_64-unknown-linux-gnu --triple x86_64-unknown-linux-gnu --build-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/lldb-test-build.noindex --lldb-module-cache-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./bin/lldb --compiler /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./bin/clang --dsymutil /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./bin --lldb-obj-root /home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/lldb --lldb-libs-dir /home/gha/actions-runner/_work/llvm-project/llvm-project/build/./lib --cmake-build-type Release /home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/test/API/functionalities/breakpoint/scripted_bkpt/overrides_resolver -p TestOverridesResolver.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 23.0.0git (https://github.com/llvm/llvm-project revision ab7af39cb46e49f174a456ddc72b3d00f701b604)
  clang revision ab7af39cb46e49f174a456ddc72b3d00f701b604
  llvm revision ab7af39cb46e49f174a456ddc72b3d00f701b604
Skipping the following test categories: msvcstl, dsym, pdb, gmodules, debugserver, objc

--
Command Output (stderr):
--
FAIL: LLDB (/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-x86_64) :: test_overrides_resolver_resolver_cmd (TestOverridesResolver.TestOverridesResolver.test_overrides_resolver_resolver_cmd)
Log Files:
 - /home/gha/actions-runner/_work/llvm-project/llvm-project/build/lldb-test-build.noindex/functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver/Failure_test_overrides_resolver_resolver_cmd.log
PASS: LLDB (/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang-x86_64) :: test_overrides_resolver_resolver_python (TestOverridesResolver.TestOverridesResolver.test_overrides_resolver_resolver_python)
======================================================================
FAIL: test_overrides_resolver_resolver_cmd (TestOverridesResolver.TestOverridesResolver.test_overrides_resolver_resolver_cmd)
   Use facade breakpoints to emulate hitting some locations
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/test/API/functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver.py", line 25, in test_overrides_resolver_resolver_cmd
    self.do_test(True)
  File "/home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/test/API/functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver.py", line 72, in do_test
    trivial_id = self.add_override(
                 ^^^^^^^^^^^^^^^^^^
  File "/home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/test/API/functionalities/breakpoint/scripted_bkpt/overrides_resolver/TestOverridesResolver.py", line 61, in add_override
    self.expect("breakpoint override list", substrs=[str(override_id), help_text])
  File "/home/gha/actions-runner/_work/llvm-project/llvm-project/lldb/packages/Python/lldbsuite/test/lldbtest.py", line 2810, in expect
    self.fail(log_msg)
AssertionError: Ran command:
"breakpoint override list"

Got output:
No overrides.

Expecting sub string: "0" (was not found)
Config=x86_64-/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang
----------------------------------------------------------------------
Ran 2 tests in 0.457s

FAILED (failures=1)

--

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

nikic

Compile-time: https://llvm-compile-time-tracker.com/compare.php?from=16e8a3c8faa569dca6f4d10b31b6f16ae57b50c6&to=424dedcc9fdf25818bae78f712f84df9c95dd46b&stat=instructions:u

Looks like multiplyWithNoWrap is pretty expensive. From a quick look, the implementation of multiplies on ConstantRange is generally quite inefficient...

nikic · 2026-05-13T12:21:58Z

Tried some basic optimization: 2172698 This was less helpful than expected: https://llvm-compile-time-tracker.com/compare.php?from=424dedcc9fdf25818bae78f712f84df9c95dd46b&to=1017ca7e4e13216cafe0629a29eb643f1667a80f&stat=instructions:u

It might be that there is some second order impact beyond the range calculation itself.

Edit: I created a PR at #197481.

bababuck · 2026-05-13T17:37:52Z

Tried some basic optimization: 2172698 This was less helpful than expected: https://llvm-compile-time-tracker.com/compare.php?from=424dedcc9fdf25818bae78f712f84df9c95dd46b&to=1017ca7e4e13216cafe0629a29eb643f1667a80f&stat=instructions:u

It might be that there is some second order impact beyond the range calculation itself.

Edit: I created a PR at #197481.

I'll dig further into some of the compile time regressions to see if there is a large codegen change to accompany it that would explain the regression.

[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr

a3a9b17

bababuck requested a review from efriedma-quic May 12, 2026 23:06

bababuck self-assigned this May 12, 2026

bababuck requested a review from nikic as a code owner May 12, 2026 23:06

bababuck added the llvm:SCEV Scalar Evolution label May 12, 2026

llvmorg-github-actions Bot added backend:AArch64 llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels May 12, 2026

bababuck commented May 12, 2026

View reviewed changes

bababuck mentioned this pull request May 12, 2026

[SCEV] Updates for getUDivExactExpr() to improve handling of MulExpr numerators/denominators #195704

Open

nikic reviewed May 13, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr#197324

[SCEV] Use NUW/NSW flags in ScalarEvolution::getRangeRef() for MulExpr#197324
bababuck wants to merge 1 commit into
llvm:mainfrom
bababuck:bababuck/getRangeRefMulExprFlags

bababuck commented May 12, 2026

Uh oh!

llvmorg-github-actions Bot commented May 12, 2026 •

edited

Loading

Uh oh!

bababuck May 12, 2026

Uh oh!

bababuck May 12, 2026

Uh oh!

github-actions Bot commented May 12, 2026

Uh oh!

nikic left a comment

Uh oh!

nikic commented May 13, 2026 •

edited

Loading

Uh oh!

bababuck commented May 13, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

		; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
		; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4

Conversation

bababuck commented May 12, 2026

Uh oh!

llvmorg-github-actions Bot commented May 12, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

bababuck May 12, 2026

Choose a reason for hiding this comment

Uh oh!

bababuck May 12, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot commented May 12, 2026

🐧 Linux x64 Test Results

Failed Tests

lldb-api

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

nikic commented May 13, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

bababuck commented May 13, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmorg-github-actions Bot commented May 12, 2026 •

edited

Loading

nikic commented May 13, 2026 •

edited

Loading