From 732969a3a328499c349eb7c750a8a520410b5ec6 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 26 Nov 2025 01:09:38 +0800 Subject: [PATCH 1/3] Precommit tests --- .../SCCP/get_vector_length-intrinsic.ll | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll new file mode 100644 index 0000000000000..2e2d3ac82106a --- /dev/null +++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -p sccp -S | FileCheck %s + +define i1 @result_le_count() { +; CHECK-LABEL: define i1 @result_le_count() { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 3, i32 4, i1 false) +; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 +; CHECK-NEXT: ret i1 [[RES]] +; + %x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +} + +define i1 @result_le_max_lanes(i32 %count) { +; CHECK-LABEL: define i1 @result_le_max_lanes( +; CHECK-SAME: i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false) +; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 +; CHECK-NEXT: ret i1 [[RES]] +; + %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +} + +define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) { +; CHECK-LABEL: define i1 @result_le_max_lanes_scalable( +; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 16 +; CHECK-NEXT: ret i1 [[RES]] +; + %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true) + %res = icmp ule i32 %x, 16 + ret i1 %res +} + +define i32 @count_le_max_lanes() { +; CHECK-LABEL: define i32 @count_le_max_lanes() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [4, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +; Can't simplify because %iv isn't <= max lanes. +define i32 @count_not_le_max_lanes() { +; CHECK-LABEL: define i32 @count_not_le_max_lanes() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [6, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) { +; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [16, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +; Can't simplify because %iv isn't guaranteed <= max lanes. +define i32 @count_le_max_lanes_scalable_unknown() { +; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_unknown() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [16, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} From 0fa4cb6a618a02e1d3463e9d3dfae34cde7c29c5 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 26 Nov 2025 01:11:10 +0800 Subject: [PATCH 2/3] [SCCP] Handle llvm.experimental.get.vector.length calls --- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 26 ++++++++++++++ .../SCCP/get_vector_length-intrinsic.ll | 34 ++++++------------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 4947d03a2dc66..7a5cc289fa5c9 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -2098,6 +2098,32 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { return (void)mergeInValue(ValueState[II], II, ValueLatticeElement::getRange(Result)); } + if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) { + unsigned BitWidth = CB.getType()->getScalarSizeInBits(); + Value *CountArg = II->getArgOperand(0); + Value *VF = II->getArgOperand(1); + bool Scalable = cast(II->getArgOperand(2))->isOne(); + ConstantRange Count = getValueState(CountArg) + .asConstantRange(CountArg->getType(), false) + .zextOrTrunc(BitWidth); + ConstantRange MaxLanes = + getValueState(VF).asConstantRange(BitWidth, false); + if (Scalable) + MaxLanes = + MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth)); + + // The result is always less than both Count and MaxLanes. + ConstantRange Result( + APInt::getZero(BitWidth), + APIntOps::umin(Count.getUpper(), MaxLanes.getUpper())); + + // If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count + if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes)) + Result = Count; + + return (void)mergeInValue(ValueState[II], II, + ValueLatticeElement::getRange(Result)); + } if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { // Compute result range for intrinsics supported by ConstantRange. diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll index 2e2d3ac82106a..3cb6154447631 100644 --- a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll +++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll @@ -3,9 +3,7 @@ define i1 @result_le_count() { ; CHECK-LABEL: define i1 @result_le_count() { -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 3, i32 4, i1 false) -; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 true ; %x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false) %res = icmp ule i32 %x, 3 @@ -16,8 +14,7 @@ define i1 @result_le_max_lanes(i32 %count) { ; CHECK-LABEL: define i1 @result_le_max_lanes( ; CHECK-SAME: i32 [[COUNT:%.*]]) { ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false) -; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 true ; %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false) %res = icmp ule i32 %x, 3 @@ -28,8 +25,7 @@ define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) { ; CHECK-LABEL: define i1 @result_le_max_lanes_scalable( ; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true) -; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 16 -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: ret i1 true ; %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true) %res = icmp ule i32 %x, 16 @@ -38,16 +34,12 @@ define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) { define i32 @count_le_max_lanes() { ; CHECK-LABEL: define i32 @count_le_max_lanes() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false) -; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 4 ; entry: br label %loop @@ -65,7 +57,7 @@ exit: ; Can't simplify because %iv isn't <= max lanes. define i32 @count_not_le_max_lanes() { -; CHECK-LABEL: define i32 @count_not_le_max_lanes() { +; CHECK-LABEL: define range(i32 0, 5) i32 @count_not_le_max_lanes() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: @@ -94,16 +86,12 @@ exit: define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) { ; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known( ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true) -; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: ret i32 16 ; entry: br label %loop @@ -121,7 +109,7 @@ exit: ; Can't simplify because %iv isn't guaranteed <= max lanes. define i32 @count_le_max_lanes_scalable_unknown() { -; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_unknown() { +; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: From 67801c511c2286f2773b5eb5e0ed9e9971f888b3 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 26 Nov 2025 02:16:07 +0800 Subject: [PATCH 3/3] Use larger of the two bitwidths, matches how SelectionDAGBuilder expands --- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 12 +++++++++--- .../Transforms/SCCP/get_vector_length-intrinsic.ll | 11 +++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 7a5cc289fa5c9..951bf1ca62fc2 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -2099,15 +2099,20 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { ValueLatticeElement::getRange(Result)); } if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) { - unsigned BitWidth = CB.getType()->getScalarSizeInBits(); Value *CountArg = II->getArgOperand(0); Value *VF = II->getArgOperand(1); bool Scalable = cast(II->getArgOperand(2))->isOne(); + + // Computation happens in the larger type. + unsigned BitWidth = std::max(CountArg->getType()->getScalarSizeInBits(), + VF->getType()->getScalarSizeInBits()); + ConstantRange Count = getValueState(CountArg) .asConstantRange(CountArg->getType(), false) .zextOrTrunc(BitWidth); - ConstantRange MaxLanes = - getValueState(VF).asConstantRange(BitWidth, false); + ConstantRange MaxLanes = getValueState(VF) + .asConstantRange(VF->getType(), false) + .zextOrTrunc(BitWidth); if (Scalable) MaxLanes = MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth)); @@ -2121,6 +2126,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes)) Result = Count; + Result = Result.zextOrTrunc(II->getType()->getScalarSizeInBits()); return (void)mergeInValue(ValueState[II], II, ValueLatticeElement::getRange(Result)); } diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll index 3cb6154447631..d0741161e729e 100644 --- a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll +++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll @@ -134,3 +134,14 @@ loop: exit: ret i32 %x } + +define i1 @result_le_overflow() { +; CHECK-LABEL: define i1 @result_le_overflow() { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 4294967296, i32 4, i1 false) +; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 +; CHECK-NEXT: ret i1 [[RES]] +; + %x = call i32 @llvm.experimental.get.vector.length(i64 u0x100000000, i32 4, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +}