diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5e46659227262..724b932150887 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1299,6 +1299,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::ActiveLaneMask: + case VPInstruction::ExplicitVectorLength: case VPInstruction::FirstActiveLane: case VPInstruction::LastActiveLane: case VPInstruction::FirstOrderRecurrenceSplice: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e8fea6851dae5..7fe40c3e04901 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1843,6 +1843,35 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, return true; } +/// From the definition of llvm.experimental.get.vector.length, +/// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF. +static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, + PredicatedScalarEvolution &PSE) { + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : *VPBB) { + VPValue *AVL; + if (!match(&R, m_EVL(m_VPValue(AVL)))) + continue; + + ScalarEvolution &SE = *PSE.getSE(); + const SCEV *AVLSCEV = vputils::getSCEVExprForVPValue(AVL, SE); + if (isa(AVLSCEV)) + continue; + const SCEV *VFSCEV = SE.getElementCount(AVLSCEV->getType(), VF); + if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV)) + continue; + + VPValue *Trunc = VPBuilder(&R).createScalarZExtOrTrunc( + AVL, Type::getInt32Ty(Plan.getContext()), AVLSCEV->getType(), + R.getDebugLoc()); + R.getVPSingleValue()->replaceAllUsesWith(Trunc); + return true; + } + } + return false; +} + void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { @@ -1852,6 +1881,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF); MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE); MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF); + MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE); if (MadeChange) { Plan.setVF(BestVF); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 345f6f632158a..78b54efe7e682 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -125,12 +125,11 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true) -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[SRC:%.*]], splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP6:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) -; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[DST:%.*]], splat (i1 true), i32 8) ; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], [[VP_OP_LOAD1]] -; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0( [[TMP7]], ptr align 1 [[TMP12]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0( [[TMP7]], ptr align 1 [[DST]], splat (i1 true), i32 8) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -164,12 +163,11 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 16, i32 8, i1 true) -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP1:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[SRC:%.*]], splat (i1 true), i32 16) ; CHECK-NEXT: [[TMP6:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) -; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP4:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[DST:%.*]], splat (i1 true), i32 16) ; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP6]], [[VP_OP_LOAD1]] -; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0( [[TMP11]], ptr align 1 [[TMP4]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0( [[TMP11]], ptr align 1 [[DST]], splat (i1 true), i32 16) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -204,12 +202,11 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 32, i32 16, i1 true) -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP1:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], splat (i1 true), i32 32) ; CHECK-NEXT: [[TMP6:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) -; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP4:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], splat (i1 true), i32 32) ; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP6]], [[VP_OP_LOAD1]] -; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[TMP11]], ptr align 1 [[TMP4]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[TMP11]], ptr align 1 [[DST]], splat (i1 true), i32 32) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -243,12 +240,11 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 24, i32 16, i1 true) -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], splat (i1 true), i32 24) ; CHECK-NEXT: [[TMP6:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) -; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], splat (i1 true), i32 24) ; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], [[VP_OP_LOAD1]] -; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[TMP7]], ptr align 1 [[DST]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[TMP7]], ptr align 1 [[DST]], splat (i1 true), i32 24) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll index 1676461863583..f1dda3d5a2f91 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll @@ -10,8 +10,7 @@ define void @foo(ptr %arg) #0 { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 3, i32 2, i1 true) -; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( zeroinitializer, ptr align 8 [[ARG]], splat (i1 true), i32 [[TMP5]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( zeroinitializer, ptr align 8 [[ARG]], splat (i1 true), i32 3) ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]]