From 37e700129992133bbf3f826da3a911a6e77b3dc4 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 5 Sep 2025 18:55:18 +0800 Subject: [PATCH] [RISCV] Use non-vp select in RISCVGatherScatterLowering. NFCish Similar to #157070 and #157068, let RISCVVLOptimizer do the work. --- .../RISCV/RISCVGatherScatterLowering.cpp | 5 +- .../rvv/fixed-vectors-strided-load-store.ll | 62 ++++++------------- .../CodeGen/RISCV/rvv/strided-load-store.ll | 39 ++++-------- 3 files changed, 35 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index 80a48c5ec11fc..52dc53e4545e4 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -561,7 +561,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) { EVL = Builder.CreateElementCount( Builder.getInt32Ty(), cast(DataType)->getElementCount()); - CallInst *Call; + Value *Call; if (!StoreVal) { Call = Builder.CreateIntrinsic( @@ -571,8 +571,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) { // Merge llvm.masked.gather's passthru if (II->getIntrinsicID() == Intrinsic::masked_gather) - Call = Builder.CreateIntrinsic(Intrinsic::vp_select, {DataType}, - {Mask, Call, II->getArgOperand(3), EVL}); + Call = Builder.CreateSelect(Mask, Call, II->getArgOperand(3)); } else Call = Builder.CreateIntrinsic( Intrinsic::experimental_vp_strided_store, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll index 84de566e05dff..4595f90c6217c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll @@ -17,10 +17,9 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 -; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[TMP1]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 @@ -60,7 +59,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = select <32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]] ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] @@ -103,10 +102,9 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> splat (i1 true), i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 -; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[TMP1]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 @@ -146,10 +144,9 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> splat (i1 true), i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 -; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[TMP1]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 @@ -195,8 +192,7 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) -; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] +; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[TMP1]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 @@ -238,7 +234,7 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> , i32 32) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = select <32 x i1> , <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]] ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> , i32 32) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -283,10 +279,9 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 -; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[TMP1]] ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 @@ -329,10 +324,9 @@ define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 -; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[TMP2]] ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]] @@ -427,8 +421,7 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] +; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[TMP1]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 @@ -478,15 +471,13 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4 ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[I4]], align 4 -; CHECK-NEXT: [[I6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[I7:%.*]] = add nsw <8 x i32> [[WIDE_LOAD10]], [[WIDE_MASKED_GATHER9]] +; CHECK-NEXT: [[I6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD]], [[TMP2]] +; CHECK-NEXT: [[I7:%.*]] = add nsw <8 x i32> [[WIDE_LOAD10]], [[TMP3]] ; CHECK-NEXT: store <8 x i32> [[I6]], ptr [[I2]], align 4 ; CHECK-NEXT: store <8 x i32> [[I7]], ptr [[I4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -549,35 +540,27 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP1]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]] +; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[TMP7]], [[TMP5]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] ; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]] +; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[TMP11]], [[TMP9]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] ; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) -; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]] +; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[TMP15]], [[TMP13]] ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128 @@ -656,13 +639,11 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]] ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]] ; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) -; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) ; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) -; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) ; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] -; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 +; V-NEXT: store <2 x ptr> [[TMP2]], ptr [[I11]], align 8 ; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 -; V-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8 +; V-NEXT: store <2 x ptr> [[TMP3]], ptr [[I13]], align 8 ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 @@ -823,10 +804,9 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) -; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) ; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]] ; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1 -; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]] +; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[TMP1]] ; CHECK-NEXT: store <32 x i8> [[I25]], ptr [[I22]], align 1 ; CHECK-NEXT: [[I27]] = add nuw i64 [[I16]], 32 ; CHECK-NEXT: [[I28_SCALAR]] = add i64 [[I17_SCALAR]], 160 @@ -932,10 +912,9 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> splat (i1 true), i32 16) -; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> splat (i1 true), <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) ; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]] ; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1 -; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]] +; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[TMP1]] ; CHECK-NEXT: store <16 x i8> [[I12]], ptr [[I10]], align 1 ; CHECK-NEXT: [[I13]] = add nuw i64 [[I5]], 16 ; CHECK-NEXT: [[I14_SCALAR]] = add i64 [[I6_SCALAR]], 80 @@ -975,8 +954,7 @@ define <8 x i8> @broadcast_ptr_base(ptr %a) { ; CHECK-LABEL: @broadcast_ptr_base( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> splat (i1 true), i32 8) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> splat (i1 true), <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) -; CHECK-NEXT: ret <8 x i8> [[TMP1]] +; CHECK-NEXT: ret <8 x i8> [[TMP0]] ; entry: %0 = insertelement <8 x ptr> poison, ptr %a, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll index 09f42eed026c2..82531b7c939f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll @@ -18,8 +18,7 @@ define @gather(ptr %a, i32 %len) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, splat (i1 true), i32 [[TMP2]]) -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], undef, i32 [[TMP2]]) -; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] +; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] @@ -63,8 +62,7 @@ define @gather_disjoint_or(ptr %a, i64 %len) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP2]], poison, i32 [[TMP1]]) -; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] +; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2 ; CHECK-NEXT: [[EXIT:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[LEN:%.*]] @@ -117,8 +115,7 @@ define @gather_non_invariant_step(ptr %a, ptr %b, i32 %len) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], undef, i32 [[TMP1]]) -; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] +; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[TMP3]] ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]] @@ -166,8 +163,7 @@ define @gather_non_invariant_step_shl(ptr %a, ptr %b, i32 %le ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 64, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], undef, i32 [[TMP1]]) -; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] +; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[TMP3]] ; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] ; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 ; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2 @@ -227,8 +223,7 @@ define @gather_splat_op_after_step(ptr %a, ptr %b, i32 %len) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 [[TMP1]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[GATHER:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP4]], undef, i32 [[TMP3]]) -; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[GATHER]] +; CHECK-NEXT: [[ACCUM_NEXT]] = add [[ACCUM]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] @@ -404,8 +399,7 @@ define @gather_loopless(ptr %p, i64 %stride) { ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], splat (i1 true), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], poison, i32 [[TMP2]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP3]] ; %step = call @llvm.stepvector.nxv1i64() %splat.insert = insertelement poison, i64 %stride, i64 0 @@ -426,8 +420,7 @@ define @straightline_offset_add(ptr %p, i64 %offset) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, splat (i1 true), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], poison, i32 [[TMP2]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP3]] ; %step = call @llvm.stepvector.nxv1i64() %splat.insert = insertelement poison, i64 %offset, i64 0 @@ -448,8 +441,7 @@ define @straightline_offset_disjoint_or_1(ptr %p) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, splat (i1 true), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], poison, i32 [[TMP2]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP3]] ; %step = call @llvm.stepvector.nxv1i64() %step.shl = shl %step, splat (i64 1) @@ -471,8 +463,7 @@ define @straightline_offset_disjoint_or(ptr %p, i1 %offset) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, splat (i1 true), i32 [[TMP2]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP3]], poison, i32 [[TMP2]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP3]] ; %step = call @llvm.stepvector.nxv1i64() %step.shl = shl %step, splat (i64 1) @@ -495,8 +486,7 @@ define @straightline_offset_shl(ptr %p) { ; CHECK-LABEL: @straightline_offset_shl( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP2]], poison, i32 [[TMP1]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP2]] ; %step = call @llvm.stepvector.nxv1i64() %offset = shl %step, splat (i64 3) @@ -540,8 +530,7 @@ define @straightline_offset_shl_nonc(ptr %p, i64 %shift) { ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP4]], poison, i32 [[TMP3]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP4]] ; %step = call @llvm.stepvector.nxv1i64() %splat.insert = insertelement poison, i64 %shift, i64 0 @@ -600,8 +589,7 @@ define @vector_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP2]], poison, i32 [[TMP1]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP2]] ; %step = call @llvm.stepvector.nxv1i64() %ptrs1 = getelementptr i64, ptr %p, %step @@ -620,8 +608,7 @@ define @splat_base_scalar_offset(ptr %p, i64 %offset) { ; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[X:%.*]] = call @llvm.vp.select.nxv1i64( splat (i1 true), [[TMP2]], poison, i32 [[TMP1]]) -; CHECK-NEXT: ret [[X]] +; CHECK-NEXT: ret [[TMP2]] ; %head = insertelement poison, ptr %p, i32 0 %splat = shufflevector %head, poison, zeroinitializer