diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b7224a33f47b1..ca85920e44b12 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8232,15 +8232,18 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); - // TODO: try to put it close to addActiveLaneMask(). - if (CM.foldTailWithEVL()) - VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength, - *Plan, CM.getMaxSafeElements()); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); VPlans.push_back(std::move(Plan)); } VF = SubRange.End; } + + if (CM.foldTailWithEVL()) { + for (auto &Plan : VPlans) { + VPlanTransforms::runPass(VPlanTransforms::optimizeMasksToEVL, *Plan); + assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); + } + } } VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( @@ -8499,6 +8502,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( } VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, *PSE.getSE()); + if (CM.foldTailWithEVL()) + VPlanTransforms::addExplicitVectorLength(*Plan, CM.getMaxSafeElements()); + assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 48bd697397f41..8df0e9850c743 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -605,9 +605,12 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, VPBuilder &Builder) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); - VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( - Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); + VPHeaderPHIRecipe *IV = LoopRegion->getCanonicalIV(); + if (auto *EVLIV = + dyn_cast(std::next(IV->getIterator()))) + IV = EVLIV; + VPSingleDefRecipe *BaseIV = + Builder.createDerivedIV(Kind, FPBinOp, StartV, IV, Step, "offset.idx"); // Truncate base induction if needed. VPTypeAnalysis TypeInfo(Plan); @@ -2621,8 +2624,42 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, return nullptr; } -/// Replace recipes with their EVL variants. -static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { +void VPlanTransforms::optimizeMasksToEVL(VPlan &Plan) { + // Find the EVL-based header mask if it exists: icmp ult step-vector, EVL + VPInstruction *HeaderMask = nullptr; + for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) { + if (match(&R, m_ICmp(m_VPInstruction(), + m_EVL(m_VPValue())))) { + HeaderMask = cast(&R); + break; + } + } + if (!HeaderMask) + return; + + VPValue *EVL = HeaderMask->getOperand(1); + + VPTypeAnalysis TypeInfo(Plan); + + for (VPUser *U : collectUsersRecursively(HeaderMask)) { + VPRecipeBase *R = cast(U); + if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) { + NewR->insertBefore(R); + for (auto [Old, New] : + zip_equal(R->definedValues(), NewR->definedValues())) + Old->replaceAllUsesWith(New); + // Erase dead stores, the rest will be removed by removeDeadRecipes. + if (R->getNumDefinedValues() == 0) + R->eraseFromParent(); + } + } + + removeDeadRecipes(Plan); +} + +/// After replacing the IV with a EVL-based IV, fixup recipes that use VF to use +/// the EVL instead to avoid incorrect updates on the penultimate iteration. +static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { VPTypeAnalysis TypeInfo(Plan); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); @@ -2650,10 +2687,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { return isa(U); }); - // Defer erasing recipes till the end so that we don't invalidate the - // VPTypeAnalysis cache. - SmallVector ToErase; - // Create a scalar phi to track the previous EVL if fixed-order recurrence is // contained. bool ContainsFORs = @@ -2687,7 +2720,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc()); VPSplice->insertBefore(&R); R.getVPSingleValue()->replaceAllUsesWith(VPSplice); - ToErase.push_back(&R); } } } @@ -2708,43 +2740,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { CmpInst::ICMP_ULT, Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL); HeaderMask->replaceAllUsesWith(EVLMask); - ToErase.push_back(HeaderMask->getDefiningRecipe()); - - // Try to optimize header mask recipes away to their EVL variants. - // TODO: Split optimizeMaskToEVL out and move into - // VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in - // tryToBuildVPlanWithVPRecipes beforehand. - for (VPUser *U : collectUsersRecursively(EVLMask)) { - auto *CurRecipe = cast(U); - VPRecipeBase *EVLRecipe = - optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL); - if (!EVLRecipe) - continue; - - unsigned NumDefVal = EVLRecipe->getNumDefinedValues(); - assert(NumDefVal == CurRecipe->getNumDefinedValues() && - "New recipe must define the same number of values as the " - "original."); - EVLRecipe->insertBefore(CurRecipe); - if (isa( - EVLRecipe)) { - for (unsigned I = 0; I < NumDefVal; ++I) { - VPValue *CurVPV = CurRecipe->getVPValue(I); - CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I)); - } - } - ToErase.push_back(CurRecipe); - } - // Remove dead EVL mask. - if (EVLMask->getNumUsers() == 0) - ToErase.push_back(EVLMask->getDefiningRecipe()); - - for (VPRecipeBase *R : reverse(ToErase)) { - SmallVector PossiblyDead(R->operands()); - R->eraseFromParent(); - for (VPValue *Op : PossiblyDead) - recursivelyDeleteDeadRecipes(Op); - } } /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and @@ -2842,7 +2837,7 @@ void VPlanTransforms::addExplicitVectorLength( DebugLoc::getCompilerGenerated(), "avl.next"); AVLPhi->addOperand(NextAVL); - transformRecipestoEVLRecipes(Plan, *VPEVL); + fixupVFUsersForEVL(Plan, *VPEVL); // Replace all uses of VPCanonicalIVPHIRecipe by // VPEVLBasedIVPHIRecipe except for the canonical IV increment. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index b28559b620e13..f474f61c5d8d3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -377,6 +377,17 @@ struct VPlanTransforms { /// users in the original exit block using the VPIRInstruction wrapping to the /// LCSSA phi. static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range); + + /// If the loop is EVL tail folded, try and optimize any recipes that use a + /// EVL based header mask to a VP intrinsic, e.g: + /// + /// %mask = icmp step-vector, EVL + /// %load = load %ptr, %mask + /// + /// -> + /// + /// %load = vp.load %ptr, EVL + static void optimizeMasksToEVL(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 34754a1ea3992..91ff2ec3f1384 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -317,6 +317,12 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { break; } } + if (const auto *EVLPhi = dyn_cast(&R)) { + if (!isa(std::prev(EVLPhi->getIterator()))) { + errs() << "EVL-based IV is not immediately after canonical IV\n"; + return false; + } + } } auto *IRBB = dyn_cast(VPBB); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index f25b86d3b20c2..183bebe818f7d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -361,12 +361,12 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP16]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[EVL_BASED_IV]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP10]], 2 ; CHECK-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP22]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index f88778b991b0b..7ba43ace33888 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -270,6 +270,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -278,7 +279,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) @@ -354,6 +354,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -362,7 +363,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) @@ -576,6 +576,7 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -583,7 +584,6 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP6]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll index 097f05d222cf6..52f9ef2805bff 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll @@ -5,7 +5,7 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; CHECK-LABEL: add ; CHECK: LV(REG): VF = vscale x 4 ; CHECK-NEXT: LV(REG): Found max usage: 2 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll index 8bbfdf39a0624..100c2d123c0ba 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll @@ -6,14 +6,14 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; ZVFH-LABEL: add ; ZVFH: LV(REG): VF = vscale x 4 ; ZVFH-NEXT: LV(REG): Found max usage: 2 item -; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; ZVFHMIN-LABEL: add ; ZVFHMIN: LV(REG): VF = vscale x 4 ; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item -; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll index 6bb0d64314d3e..fbe28b3bf2bc4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll @@ -4,7 +4,7 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP: LV(REG): VF = vscale x 16 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item -; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 99139da67bb78..591df1abe06d2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -31,28 +31,28 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; CHECK-LMUL1-LABEL: add ; CHECK-LMUL1: LV(REG): VF = vscale x 2 ; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL2-LABEL: add ; CHECK-LMUL2: LV(REG): VF = vscale x 4 ; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL4-LABEL: add ; CHECK-LMUL4: LV(REG): VF = vscale x 8 ; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL8-LABEL: add ; CHECK-LMUL8: LV(REG): VF = vscale x 16 ; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers @@ -86,19 +86,19 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-LMUL1: LV(REG): VF = vscale x 2 ; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL2: LV(REG): VF = vscale x 4 ; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL4: LV(REG): VF = vscale x 8 ; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL8: LV(REG): VF = vscale x 16 ; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers entry: %cmp3 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index f2f65685e9bad..415f8bd8a8795 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -25,8 +25,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] ; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 @@ -66,8 +66,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] ; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] @@ -208,9 +208,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] +; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP22]] @@ -236,7 +236,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: @@ -271,9 +271,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] +; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] @@ -297,7 +297,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]] ; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: ; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV32: [[SCALAR_PH]]: @@ -459,9 +459,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] +; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP22]] @@ -487,7 +487,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: @@ -522,9 +522,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] +; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; RV32-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]] @@ -548,7 +548,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]] ; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; RV32-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: ; RV32-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV32: [[SCALAR_PH]]: @@ -688,8 +688,8 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] ; RV64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP19]] to i64 @@ -714,7 +714,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[EXIT:.*]] ; RV64: [[EXIT]]: @@ -729,8 +729,8 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] ; RV32-NEXT: [[TMP10:%.*]] = mul i32 0, [[TMP9]] @@ -753,7 +753,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] ; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: ; RV32-NEXT: br label %[[EXIT:.*]] ; RV32: [[EXIT]]: @@ -884,7 +884,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 ; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[SCALAR_PH:.*]] ; RV64: [[SCALAR_PH]]: @@ -935,7 +935,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 ; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: ; RV32-NEXT: br label %[[SCALAR_PH:.*]] ; RV32: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 7b0ac78fb365c..2ae61b28b0c15 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -16,8 +16,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 @@ -133,9 +133,9 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[OFFSET_IDX3]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) @@ -281,8 +281,8 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index f804329169fe0..3ece88bc75187 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -19,8 +19,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) ; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]] ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll index 46695221c27db..4c834ebf8e8c4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll @@ -27,10 +27,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.+]]> ; CHECK-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%.+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ vp<[[OTC]]>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] -; CHECK-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[EVL_PHI]]> * ir<-1> -; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1>, vp<[[EVL]]> -; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[SCALAR_STEPS]]>, ir<-1> +; CHECK-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[DERIVED_IV]]>, ir<-1> ; CHECK-NEXT: CLONE ir<[[IDX_PROM:%.+]]> = zext ir<[[IDX]]> ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_B:%.+]]> = getelementptr inbounds ir<[[B:%.+]]>, ir<[[IDX_PROM]]> ; CHECK-NEXT: vp<[[VEC_END_PTR_B:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_B]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index ab4bb90b55b70..ce0d312cc10b1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -29,15 +29,12 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%TC>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[PREV_EVL:%.+]]> = phi [ vp<[[VF32]]>, vector.ph ], [ vp<[[EVL:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> -; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%A>, vp<[[ST]] -; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> -; IF-EVL-NEXT: WIDEN ir<[[LD]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%A>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: WIDEN ir<[[LD]]> = vp.load ir<[[GEP1]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SPLICE:%[0-9]+]]> = call llvm.experimental.vp.splice(ir<[[FOR_PHI]]>, ir<[[LD]]>, ir<-1>, ir, vp<[[PREV_EVL]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw vp<[[SPLICE]]>, ir<[[LD]]> -; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: WIDEN vp.store ir<[[GEP2]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index dff4971ffdfa1..c0c6fd4e0d235 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -42,10 +42,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_SELECT:%.+]]> ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> -; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> -; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]> +; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load ir<[[GEP1]]>, vp<[[EVL]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 @@ -72,21 +70,18 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Live-in ir<%n> = original trip-count ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP: vector.ph: -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_START:%.]]> = reduction-start-vector ir<%start>, ir<0>, ir<1> ; IF-EVL-INLOOP-NEXT: Successor(s): vector loop ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: vector loop: { ; IF-EVL-INLOOP-NEXT: vector.body: ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> -; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> +; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[ADD:%.+]]> ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> -; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> -; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) +; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]> +; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load ir<[[GEP1]]>, vp<[[EVL]]> +; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index b3a611eac72fc..48104b817648c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -26,17 +26,13 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> -; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> -; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> -; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> -; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load ir<[[GEP1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load ir<[[GEP2]]>, vp<[[EVL]]> ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> -; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> -; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> -; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: WIDEN vp.store ir<[[GEP3]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]>