diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 4947d03a2dc66..951bf1ca62fc2 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -2098,6 +2098,38 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { return (void)mergeInValue(ValueState[II], II, ValueLatticeElement::getRange(Result)); } + if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) { + Value *CountArg = II->getArgOperand(0); + Value *VF = II->getArgOperand(1); + bool Scalable = cast(II->getArgOperand(2))->isOne(); + + // Computation happens in the larger type. + unsigned BitWidth = std::max(CountArg->getType()->getScalarSizeInBits(), + VF->getType()->getScalarSizeInBits()); + + ConstantRange Count = getValueState(CountArg) + .asConstantRange(CountArg->getType(), false) + .zextOrTrunc(BitWidth); + ConstantRange MaxLanes = getValueState(VF) + .asConstantRange(VF->getType(), false) + .zextOrTrunc(BitWidth); + if (Scalable) + MaxLanes = + MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth)); + + // The result is always less than both Count and MaxLanes. + ConstantRange Result( + APInt::getZero(BitWidth), + APIntOps::umin(Count.getUpper(), MaxLanes.getUpper())); + + // If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count + if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes)) + Result = Count; + + Result = Result.zextOrTrunc(II->getType()->getScalarSizeInBits()); + return (void)mergeInValue(ValueState[II], II, + ValueLatticeElement::getRange(Result)); + } if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { // Compute result range for intrinsics supported by ConstantRange. diff --git a/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll new file mode 100644 index 0000000000000..d0741161e729e --- /dev/null +++ b/llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -p sccp -S | FileCheck %s + +define i1 @result_le_count() { +; CHECK-LABEL: define i1 @result_le_count() { +; CHECK-NEXT: ret i1 true +; + %x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +} + +define i1 @result_le_max_lanes(i32 %count) { +; CHECK-LABEL: define i1 @result_le_max_lanes( +; CHECK-SAME: i32 [[COUNT:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false) +; CHECK-NEXT: ret i1 true +; + %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +} + +define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) { +; CHECK-LABEL: define i1 @result_le_max_lanes_scalable( +; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true) +; CHECK-NEXT: ret i1 true +; + %x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true) + %res = icmp ule i32 %x, 16 + ret i1 %res +} + +define i32 @count_le_max_lanes() { +; CHECK-LABEL: define i32 @count_le_max_lanes() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 4 +; +entry: + br label %loop + +loop: + %iv = phi i32 [4, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +; Can't simplify because %iv isn't <= max lanes. +define i32 @count_not_le_max_lanes() { +; CHECK-LABEL: define range(i32 0, 5) i32 @count_not_le_max_lanes() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [6, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) { +; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 16 +; +entry: + br label %loop + +loop: + %iv = phi i32 [16, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +; Can't simplify because %iv isn't guaranteed <= max lanes. +define i32 @count_le_max_lanes_scalable_unknown() { +; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true) +; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [16, %entry], [%iv.next, %loop] + %x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true) + %iv.next = sub i32 %iv, %x + %ec = icmp eq i32 %iv.next, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %x +} + +define i1 @result_le_overflow() { +; CHECK-LABEL: define i1 @result_le_overflow() { +; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 4294967296, i32 4, i1 false) +; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3 +; CHECK-NEXT: ret i1 [[RES]] +; + %x = call i32 @llvm.experimental.get.vector.length(i64 u0x100000000, i32 4, i1 false) + %res = icmp ule i32 %x, 3 + ret i1 %res +}