Skip to content

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Sep 10, 2025

Stacked on #157735

@llvmbot
Copy link
Member

llvmbot commented Sep 10, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Craig Topper (topperc)

Changes

Stacked on #157735


Full diff: https://github.com/llvm/llvm-project/pull/157777.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/ExpandVectorPredication.cpp (+8)
  • (modified) llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll (+42-1)
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 75e8fe4965025..03abc042e5564 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -581,6 +581,14 @@ bool CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
     replaceOperation(*NewNegOp, VPI);
     return NewNegOp;
   }
+  case Intrinsic::vp_select:
+  case Intrinsic::vp_merge: {
+    assert(maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam());
+    Value *NewSelectOp = Builder.CreateSelect(
+        VPI.getOperand(0), VPI.getOperand(1), VPI.getOperand(2), VPI.getName());
+    replaceOperation(*NewSelectOp, VPI);
+    return NewSelectOp;
+  }
   case Intrinsic::vp_abs:
   case Intrinsic::vp_smax:
   case Intrinsic::vp_smin:
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
index 0c3a7c681c4d0..8fef808f193a9 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
@@ -204,8 +204,49 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; ALL-CONVERT-NEXT:  %{{.+}} = ashr <8 x i32> %i0, %i1
 ; ALL-CONVERT-NEXT:  %{{.+}} = lshr <8 x i32> %i0, %i1
 ; ALL-CONVERT-NEXT:  %{{.+}} = shl <8 x i32> %i0, %i1
-; ALL-CONVERT:       ret void
+; ALL-CONVERT-NEXT:  [[NINS2:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
+; ALL-CONVERT-NEXT:  [[NSPLAT2:%.+]] = shufflevector <8 x i32> [[NINS2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; ALL-CONVERT-NEXT:  [[EVLM2:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT2]]
+; ALL-CONVERT-NEXT:  [[NEWM2:%.+]] = and <8 x i1> [[EVLM2]], %m
+; ALL-CONVERT-NEXT:  %{{.+}} = select <8 x i1> [[NEWM2]], <8 x i32> %i0, <8 x i32> %i1
+; ALL-CONVERT:       %{{.+}} = select <8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1
+; ALL-CONVERT-NEXT:  ret void
 
+; ALL-CONVERT: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
+; ALL-CONVERT:       %{{.*}} = add <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = sub <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = mul <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
+; ALL-CONVERT:       [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
+; ALL-CONVERT:       [[SELONE:%.+]] = select <vscale x 4 x i1> [[NEWM]], <vscale x 4 x i32> %i1, <vscale x 4 x i32> splat (i32 1)
+; ALL-CONVERT:       %{{.*}} = sdiv <vscale x 4 x i32> %i0, [[SELONE]]
+; ALL-CONVERT:       [[EVLM2:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
+; ALL-CONVERT:       [[NEWM2:%.+]] = and <vscale x 4 x i1> [[EVLM2]], %m
+; ALL-CONVERT:       [[SELONE2:%.+]] = select <vscale x 4 x i1> [[NEWM2]], <vscale x 4 x i32> %i1, <vscale x 4 x i32> splat (i32 1)
+; ALL-CONVERT:       %{{.*}} = srem <vscale x 4 x i32> %i0, [[SELONE2]]
+; ALL-CONVERT:       [[EVLM3:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
+; ALL-CONVERT:       [[NEWM3:%.+]] = and <vscale x 4 x i1> [[EVLM3]], %m
+; ALL-CONVERT:       [[SELONE3:%.+]] = select <vscale x 4 x i1> [[NEWM3]], <vscale x 4 x i32> %i1, <vscale x 4 x i32> splat (i32 1)
+; ALL-CONVERT:       %{{.*}} = udiv <vscale x 4 x i32> %i0, [[SELONE3]]
+; ALL-CONVERT:       [[EVLM4:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
+; ALL-CONVERT:       [[NEWM4:%.+]] = and <vscale x 4 x i1> [[EVLM4]], %m
+; ALL-CONVERT:       [[SELONE4:%.+]] = select <vscale x 4 x i1> [[NEWM4]], <vscale x 4 x i32> %i1, <vscale x 4 x i32> splat (i32 1)
+; ALL-CONVERT:       %{{.*}} = urem <vscale x 4 x i32> %i0, [[SELONE4]]
+; ALL-CONVERT:       %{{.+}} = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1)
+; ALL-CONVERT:       %{{.+}} = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1)
+; ALL-CONVERT:       %{{.+}} = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1)
+; ALL-CONVERT:       %{{.+}} = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1)
+; ALL-CONVERT:       %{{.*}} = and <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = or <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = xor <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = ashr <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = lshr <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       %{{.*}} = shl <vscale x 4 x i32> %i0, %i1
+; ALL-CONVERT:       [[EVLM5:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
+; ALL-CONVERT:       [[NEWM5:%.+]] = and <vscale x 4 x i1> [[EVLM5]], %m
+; ALL-CONVERT:       %{{.*}} = select <vscale x 4 x i1> [[NEWM5]], <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1
+; ALL-CONVERT:       %{{.*}} = select <vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1
+; ALL-CONVERT-NEXT:  ret void
 
 ; Check that reductions use the correct neutral element for masked-off elements
 ; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {

}
case Intrinsic::vp_select:
case Intrinsic::vp_merge: {
assert(maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Subject to the assert, the code here appears correct, but what ensures the second clause holds? For vp.select, we can convert undef elements to either true or false, but for vp.merge we need the pivot semantics if EVL!=VLMAX. How do we know that EVL=VLMAX in this path?

From the test diffs below, it looks like we do handle the pivot, I'm just confused about the code structure. Give me a pointer?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the code here

if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
forces the EVLStrategy to Convert if the intrinsic is to be converted. That causes this code
if (foldEVLIntoMask(VPI)) {
to fold the EVL into the mask.

Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@topperc topperc merged commit 397e5a4 into llvm:main Sep 10, 2025
9 checks passed
@topperc topperc deleted the pr/expand-merge-select branch September 10, 2025 15:50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants