From 9b92c8dce93a2e5cf503868a26cf8d5092018380 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 18 Oct 2025 21:31:12 +0100 Subject: [PATCH 1/3] [VPlan] Replace ExtractLast(Elem|LanePerPart) with ExtractLast(Lane/Part) Replace ExtractLastElement and ExtractLastLanePerPart with more generic and specific ExtractLastLane and ExtractLastPart, which model distinct parts of extracting across parts and lanes. ExtractLastElement == ExtractLastLane(ExtractLastPart) and ExtractLastLanePerPart == ExtractLastLane, the latter clarifying the name of the opcode. A new m_ExtractLastElement matcher is provided for convenience. The patch should be NFC modulo printing changes. --- llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++- .../Transforms/Vectorize/VPlanAnalysis.cpp | 7 ++- .../Transforms/Vectorize/VPlanPatternMatch.h | 20 ++++-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 27 ++++---- .../Transforms/Vectorize/VPlanTransforms.cpp | 62 +++++++++---------- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 41 ++++++------ .../LoopVectorize/AArch64/vplan-printing.ll | 6 +- .../RISCV/vplan-vp-intrinsics-reduction.ll | 20 ++++-- .../first-order-recurrence-chains-vplan.ll | 27 +++++--- ...-order-recurrence-sink-replicate-region.ll | 4 +- ...first-order-recurrence-with-uniform-ops.ll | 12 ++-- .../interleave-and-scalarize-only.ll | 3 +- .../vplan-printing-reductions.ll | 26 +++++--- .../LoopVectorize/vplan-printing.ll | 12 ++-- 14 files changed, 161 insertions(+), 116 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 2591df8943752..41dfcce720ad4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1017,12 +1017,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ComputeAnyOfResult, ComputeFindIVResult, ComputeReductionResult, - // Extracts the last lane from its operand if it is a vector, or the last - // part if scalar. In the latter case, the recipe will be removed during - // unrolling. - ExtractLastElement, - // Extracts the last lane for each part from its operand. - ExtractLastLanePerPart, + // Extracts the last part of its operand. + ExtractLastPart, + // Extracts the last lane of the current part of its operand. + ExtractLastLane, // Extracts the second-to-last lane from its operand or the second-to-last // part if it is scalar. In the latter case, the recipe will be removed // during unrolling. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 80a2e4bc3f754..b93920d285efb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -116,14 +116,17 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return inferScalarType(R->getOperand(1)); case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); - case VPInstruction::ExtractLastElement: - case VPInstruction::ExtractLastLanePerPart: + case VPInstruction::ExtractLastLane: case VPInstruction::ExtractPenultimateElement: { Type *BaseTy = inferScalarType(R->getOperand(0)); if (auto *VecTy = dyn_cast(BaseTy)) return VecTy->getElementType(); return BaseTy; } + case VPInstruction::ExtractLastPart: { + // ExtractLastPart returns the same type as its operand + return inferScalarType(R->getOperand(0)); + } case VPInstruction::LogicalAnd: assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && inferScalarType(R->getOperand(1))->isIntegerTy(1) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b5b98c64543e4..2208ccca92c36 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -383,9 +383,9 @@ m_EVL(const Op0_t &Op0) { } template -inline VPInstruction_match -m_ExtractLastElement(const Op0_t &Op0) { - return m_VPInstruction(Op0); +inline VPInstruction_match +m_ExtractLastLane(const Op0_t &Op0) { + return m_VPInstruction(Op0); } template @@ -395,9 +395,17 @@ m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) { } template -inline VPInstruction_match -m_ExtractLastLanePerPart(const Op0_t &Op0) { - return m_VPInstruction(Op0); +inline VPInstruction_match +m_ExtractLastPart(const Op0_t &Op0) { + return m_VPInstruction(Op0); +} + +template +inline VPInstruction_match< + VPInstruction::ExtractLastLane, + VPInstruction_match> +m_ExtractLastElement(const Op0_t &Op0) { + return m_ExtractLastLane(m_ExtractLastPart(Op0)); } template diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 931a5b7582c4e..68cb6ff507acb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -520,8 +520,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExplicitVectorLength: - case VPInstruction::ExtractLastElement: - case VPInstruction::ExtractLastLanePerPart: + case VPInstruction::ExtractLastLane: + case VPInstruction::ExtractLastPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: case VPInstruction::Not: @@ -890,8 +890,7 @@ Value *VPInstruction::generate(VPTransformState &State) { return ReducedPartRdx; } - case VPInstruction::ExtractLastLanePerPart: - case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLane: case VPInstruction::ExtractPenultimateElement: { unsigned Offset = getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1; @@ -1159,7 +1158,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, I32Ty, {Arg0Ty, I32Ty, I1Ty}); return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); } - case VPInstruction::ExtractLastElement: { + case VPInstruction::ExtractLastLane: { // Add on the cost of extracting the element. auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement, @@ -1179,8 +1178,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } bool VPInstruction::isVectorToScalar() const { - return getOpcode() == VPInstruction::ExtractLastElement || - getOpcode() == VPInstruction::ExtractLastLanePerPart || + return getOpcode() == VPInstruction::ExtractLastLane || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::ExtractLane || @@ -1243,8 +1241,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExtractLane: - case VPInstruction::ExtractLastElement: - case VPInstruction::ExtractLastLanePerPart: + case VPInstruction::ExtractLastLane: + case VPInstruction::ExtractLastPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::ActiveLaneMask: case VPInstruction::FirstActiveLane: @@ -1391,11 +1389,11 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractLane: O << "extract-lane"; break; - case VPInstruction::ExtractLastElement: - O << "extract-last-element"; + case VPInstruction::ExtractLastLane: + O << "extract-last-lane"; break; - case VPInstruction::ExtractLastLanePerPart: - O << "extract-last-lane-per-part"; + case VPInstruction::ExtractLastPart: + O << "extract-last-part"; break; case VPInstruction::ExtractPenultimateElement: O << "extract-penultimate-element"; @@ -1558,7 +1556,8 @@ void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) { if (Exiting->isLiveIn()) return; - Exiting = Builder.createNaryOp(VPInstruction::ExtractLastElement, {Exiting}); + Exiting = Builder.createNaryOp(VPInstruction::ExtractLastPart, Exiting); + Exiting = Builder.createNaryOp(VPInstruction::ExtractLastLane, Exiting); setOperand(0, Exiting); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 84817d78a077a..08b558d53ce5b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1238,9 +1238,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - // Look through ExtractLastElement (BuildVector ....). - if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), - m_ExtractLastLanePerPart(m_BuildVector())))) { + // Look through ExtractLastLane (BuildVector ....). + if (match(&R, m_ExtractLastLane(m_BuildVector()))) { auto *BuildVector = cast(R.getOperand(0)); Def->replaceAllUsesWith( BuildVector->getOperand(BuildVector->getNumOperands() - 1)); @@ -1313,15 +1312,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - if (match(Def, - m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))), - m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) { + if (match(Def, m_ExtractLastLane(m_Broadcast(m_VPValue(A))))) { Def->replaceAllUsesWith(A); return; } - if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)), - m_ExtractLastLanePerPart(m_VPValue(A)))) && + if (match(Def, m_ExtractLastLane(m_VPValue(A))) && ((isa(A) && vputils::isSingleScalar(A)) || (isa(A) && cast(A)->isSingleScalar())) && @@ -1330,11 +1326,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(A); } - if (Plan->getUF() == 1 && - match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) { - return Def->replaceAllUsesWith( - Builder.createNaryOp(VPInstruction::ExtractLastElement, {A})); - } + if (Plan->getUF() == 1 && match(Def, m_ExtractLastPart(m_VPValue(A)))) + return Def->replaceAllUsesWith(A); } void VPlanTransforms::simplifyRecipes(VPlan &Plan) { @@ -1372,13 +1365,14 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); Clone->insertBefore(RepOrWidenR); - unsigned ExtractOpc = - vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) - ? VPInstruction::ExtractLastElement - : VPInstruction::ExtractLastLanePerPart; - auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)}); - Ext->insertBefore(Clone); - Clone->setOperand(0, Ext); + VPBuilder Builder(Clone); + VPValue *ExtractOp = Clone->getOperand(0); + if (vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))) + ExtractOp = + Builder.createNaryOp(VPInstruction::ExtractLastPart, ExtractOp); + ExtractOp = + Builder.createNaryOp(VPInstruction::ExtractLastLane, ExtractOp); + Clone->setOperand(0, ExtractOp); RepR->eraseFromParent(); continue; } @@ -1389,9 +1383,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (!vputils::isSingleScalar(RepOrWidenR) || !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || - match(cast(U), - m_CombineOr(m_ExtractLastElement(m_VPValue()), - m_ExtractLastLanePerPart(m_VPValue()))); + match(cast(U), m_ExtractLastPart(m_VPValue())); })) continue; @@ -4412,10 +4404,13 @@ void VPlanTransforms::addScalarResumePhis( auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue(); assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() && "Cannot handle loops with uncountable early exits"); - if (IsFOR) - ResumeFromVectorLoop = MiddleBuilder.createNaryOp( - VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {}, - "vector.recur.extract"); + if (IsFOR) { + auto *ExtractPart = MiddleBuilder.createNaryOp( + VPInstruction::ExtractLastPart, ResumeFromVectorLoop); + ResumeFromVectorLoop = + MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane, + ExtractPart, {}, "vector.recur.extract"); + } StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); @@ -4513,10 +4508,11 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan, // Now update VPIRInstructions modeling LCSSA phis in the exit block. // Extract the penultimate value of the recurrence and use it as operand for // the VPIRInstruction modeling the phi. - for (VPUser *U : FOR->users()) { - using namespace llvm::VPlanPatternMatch; - if (!match(U, m_ExtractLastElement(m_Specific(FOR)))) + for (VPRecipeBase &R : make_early_inc_range( + make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) { + if (!match(&R, m_ExtractLastElement(m_Specific(FOR)))) continue; + // For VF vscale x 1, if vscale = 1, we are unable to extract the // penultimate value of the recurrence. Instead we rely on the existing // extract of the last element from the result of @@ -4526,9 +4522,11 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan, Range)) return; VPValue *PenultimateElement = MiddleBuilder.createNaryOp( - VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()}, + VPInstruction::ExtractPenultimateElement, + MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, + FOR->getBackedgeValue()), {}, "vector.recur.extract.for.phi"); - cast(U)->replaceAllUsesWith(PenultimateElement); + cast(&R)->replaceAllUsesWith(PenultimateElement); } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index cfd1a741ee841..efbd02f7f4ecd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -372,22 +372,27 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { R.addOperand(getValueForPart(Op1, Part)); continue; } - if (match(&R, m_ExtractLastElement(m_VPValue(Op0))) || - match(&R, m_VPInstruction( - m_VPValue(Op0)))) { - addUniformForAllParts(cast(&R)); + + // Handle extraction from the last part. For scalar VF, directly replace + // with the appropriate scalar part. Otherwise, update operand to use the + // part. + if (match(&R, m_VPInstruction( + m_ExtractLastPart(m_VPValue(Op0)))) || + match(&R, m_ExtractLastElement(m_VPValue(Op0)))) { + auto *I = cast(&R); + bool IsPenultimate = + I->getOpcode() == VPInstruction::ExtractPenultimateElement; + unsigned PartIdx = IsPenultimate ? UF - 2 : UF - 1; + if (Plan.hasScalarVFOnly()) { - auto *I = cast(&R); - // Extracting from end with VF = 1 implies retrieving the last or - // penultimate scalar part (UF-1 or UF-2). - unsigned Offset = - I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2; - I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset)); - R.eraseFromParent(); - } else { - // Otherwise we extract from the last part. - remapOperands(&R, UF - 1); + // For scalar VF, directly use the scalar part value. + addUniformForAllParts(I); + I->replaceAllUsesWith(getValueForPart(Op0, PartIdx)); + continue; } + // For vector VF, extract from the last part. + addUniformForAllParts(I); + R.setOperand(0, getValueForPart(Op0, UF - 1)); continue; } @@ -491,12 +496,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, continue; } if (Lane.getKind() == VPLane::Kind::ScalableLast) { - // Look through mandatory Unpack. - [[maybe_unused]] bool Matched = - match(Op, m_VPInstruction(m_VPValue(Op))); - assert(Matched && "original op must have been Unpack"); + auto *ExtractPart = + Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op}); NewOps.push_back( - Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); + Builder.createNaryOp(VPInstruction::ExtractLastLane, {ExtractPart})); continue; } if (vputils::isSingleScalar(Op)) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 49f663f5703b6..36e1fcf8b9bbd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -39,7 +39,9 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]> +; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]> +; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART2:%.+]]> = extract-last-part vp<[[RED_RESULT_PART]]> +; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = extract-last-lane vp<[[RED_RESULT_PART2]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -50,7 +52,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT_PART]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index dff4971ffdfa1..7932adbe158b8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -58,7 +58,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): middle.block ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: middle.block: -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: @@ -97,7 +99,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): middle.block ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: middle.block: -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: @@ -131,7 +135,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): middle.block ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: middle.block: -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; NO-VP-OUTLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]> ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph @@ -142,7 +148,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: ; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[IV_RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX]]>, middle.block ], [ ir<%start>, ir-bb ] +; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX_PART]]>, middle.block ], [ ir<%start>, ir-bb ] ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: @@ -179,7 +185,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): middle.block ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: middle.block: -; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> +; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> +; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> ; NO-VP-INLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; NO-VP-INLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]> ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph @@ -190,7 +198,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: ; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[IV_RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX]]>, middle.block ], [ ir<%start>, ir-bb ] +; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX_PART]]>, middle.block ], [ ir<%start>, ir-bb ] ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 1376a687b38b8..e82226e3c7d3a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -37,8 +37,10 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-element vp<[[FOR1_SPLICE]]> +; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]> +; CHECK-NEXT: EMIT vp<[[RESUME_2_PART:%.+]]> = extract-last-part vp<[[FOR1_SPLICE]]> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[RESUME_2_PART]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -117,9 +119,12 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-element vp<[[FOR1_SPLICE]]> -; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-last-element vp<[[FOR2_SPLICE]]> +; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]> +; CHECK-NEXT: EMIT vp<[[RESUME_2_PART:%.+]]> = extract-last-part vp<[[FOR1_SPLICE]]> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[RESUME_2_PART]]> +; CHECK-NEXT: EMIT vp<[[RESUME_3_PART:%.+]]> = extract-last-part vp<[[FOR2_SPLICE]]> +; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-last-lane vp<[[RESUME_3_PART]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -203,8 +208,10 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-element ir<%for.x.next> -; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-element ir<%for.x.prev> +; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next> +; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]> +; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev> +; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]> ; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -282,8 +289,10 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-element ir<%for.x.next> -; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-element ir<%for.x.prev> +; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next> +; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]> +; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev> +; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]> ; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 9deab9063d710..5a223b5b6c726 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -212,7 +212,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll index 198a30af814ba..33c91ca875ed7 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -137,18 +137,18 @@ define i16 @for_phi_removed(ptr %src) { ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 ; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: [[MIDDLE_BLOCK]]: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]] ; UNROLL-NO-IC: [[SCALAR_PH]]: ; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]] ; UNROLL-NO-IC: [[LOOP]]: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 @@ -202,18 +202,18 @@ define i16 @for_phi_removed(ptr %src) { ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; SINK-AFTER-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 ; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: [[MIDDLE_BLOCK]]: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]] ; SINK-AFTER: [[SCALAR_PH]]: ; SINK-AFTER-NEXT: br label %[[LOOP:.*]] ; SINK-AFTER: [[LOOP]]: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 64caecc847096..bbd596a772c53 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -219,7 +219,8 @@ exit: ; DBG-NEXT: Successor(s): middle.block ; DBG-EMPTY: ; DBG-NEXT: middle.block: -; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element vp<[[SCALAR_STEPS]]> +; DBG-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part vp<[[SCALAR_STEPS]]> +; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]> ; DBG-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> ; DBG-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; DBG-NEXT: Successor(s): ir-bb, scalar.ph diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 06b044872c217..8f039c062c531 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -35,7 +35,9 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -46,7 +48,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES_PART]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -177,7 +179,9 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]> +; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -188,7 +192,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES_PART]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -238,7 +242,9 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> +; CHECK-NEXT: EMIT vp<[[RDX_RES_PART:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> +; CHECK-NEXT: EMIT vp<[[RDX_RES_PART2:%.+]]> = extract-last-part vp<[[RDX_RES_PART]]> +; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = extract-last-lane vp<[[RDX_RES_PART2]]> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -249,7 +255,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<{{.+}}>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RDX_RES]]>, middle.block ], [ ir<%start>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RDX_RES_PART]]>, middle.block ], [ ir<%start>, ir-bb ] ; entry: br label %loop @@ -497,12 +503,14 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8> +; CHECK-NEXT: EMIT vp<%11> = extract-last-part vp<%10> +; CHECK-NEXT: EMIT vp<%12> = extract-last-lane vp<%11> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block) +; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%12> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: @@ -615,12 +623,14 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8> +; CHECK-NEXT: EMIT vp<%11> = extract-last-part vp<%10> +; CHECK-NEXT: EMIT vp<%12> = extract-last-lane vp<%11> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block) +; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%12> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 91e0037d12c61..727005666d868 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -536,7 +536,8 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extract-last-element ir<%add> +; CHECK-NEXT: EMIT vp<[[EXIT_PART:%.+]]> = extract-last-part ir<%add> +; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extract-last-lane vp<[[EXIT_PART]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -903,7 +904,8 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> -; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-element ir<%zext> +; CHECK-NEXT: EMIT vp<[[EXT_PART:%.+]]> = extract-last-part ir<%zext> +; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-lane vp<[[EXT_PART]]> ; CHECK-NEXT: CLONE store vp<[[EXT]]>, ir<%p1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> @@ -960,8 +962,10 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next> -; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-penultimate-element ir<%for.1.next> +; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]> +; CHECK-NEXT: EMIT vp<[[FOR_RESULT_PART:%.+]]> = extract-last-part ir<%for.1.next> +; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-penultimate-element vp<[[FOR_RESULT_PART]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph From 47861d77490217aea43a2c38d052cefe81256d02 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 28 Oct 2025 04:13:26 +0000 Subject: [PATCH 2/3] !fixup address comments, thanks --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +++--- llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 3 ++- .../Transforms/Vectorize/VPlanConstruction.cpp | 2 +- .../lib/Transforms/Vectorize/VPlanPatternMatch.h | 2 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++--- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 16 +++++++++------- 8 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index facb0fabdf57e..886d71f66bfbb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8821,7 +8821,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (FinalReductionResult == U || Parent->getParent()) continue; U->replaceUsesOfWith(OrigExitingVPV, FinalReductionResult); - if (match(U, m_ExtractLastElement(m_VPValue()))) + if (match(U, m_ExtractFinalLane(m_VPValue()))) cast(U)->replaceAllUsesWith(FinalReductionResult); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 619ea16e42076..149de2ecf0203 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1017,9 +1017,9 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ComputeAnyOfResult, ComputeFindIVResult, ComputeReductionResult, - // Extracts the last part of its operand. + // Extracts the last part of its operand. Removed during unrolling. ExtractLastPart, - // Extracts the last lane of the current part of its operand. + // Extracts the last lane of its vector operand, per part. ExtractLastLane, // Extracts the second-to-last lane from its operand or the second-to-last // part if it is scalar. In the latter case, the recipe will be removed @@ -1403,7 +1403,7 @@ class VPIRInstruction : public VPRecipeBase { /// Update the recipes first operand to the last lane of the operand using \p /// Builder. Must only be used for VPIRInstructions with at least one operand /// wrapping a PHINode. - void extractLastLaneOfFirstOperand(VPBuilder &Builder); + void extractFinalLaneOfFirstOperand(VPBuilder &Builder); }; /// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index b93920d285efb..29aee2ad955d2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -124,7 +124,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return BaseTy; } case VPInstruction::ExtractLastPart: { - // ExtractLastPart returns the same type as its operand + // Element type of ExtractLastPart is equal to the element type as its + // operand. return inferScalarType(R->getOperand(0)); } case VPInstruction::LogicalAnd: diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 65688a3f0b6be..eba3c97b51df0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -471,7 +471,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) { ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB && "exit values from early exits must be fixed when branch to " "early-exit is added"); - ExitIRI->extractLastLaneOfFirstOperand(B); + ExitIRI->extractFinalLaneOfFirstOperand(B); } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 2208ccca92c36..19ae60aacb3ad 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -404,7 +404,7 @@ template inline VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match> -m_ExtractLastElement(const Op0_t &Op0) { +m_ExtractFinalLane(const Op0_t &Op0) { return m_ExtractLastLane(m_ExtractLastPart(Op0)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 68cb6ff507acb..6f5f8a7f5b5b0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1548,7 +1548,7 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF, return 0; } -void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) { +void VPIRInstruction::extractFinalLaneOfFirstOperand(VPBuilder &Builder) { assert(isa(getInstruction()) && "can only update exiting operands to phi nodes"); assert(getNumOperands() > 0 && "must have at least one operand"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 08b558d53ce5b..f5b8a869d33b9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -839,7 +839,7 @@ static VPValue *optimizeLatchExitInductionUser( VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, DenseMap &EndValues, ScalarEvolution &SE) { VPValue *Incoming; - if (!match(Op, m_ExtractLastElement(m_VPValue(Incoming)))) + if (!match(Op, m_ExtractFinalLane(m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming, SE); @@ -3478,7 +3478,7 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, if (ExitIRI->getNumOperands() != 1) { // The first of two operands corresponds to the latch exit, via MiddleVPBB // predecessor. Extract its last lane. - ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder); + ExitIRI->extractFinalLaneOfFirstOperand(MiddleBuilder); } VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx); @@ -4510,7 +4510,7 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan, // the VPIRInstruction modeling the phi. for (VPRecipeBase &R : make_early_inc_range( make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) { - if (!match(&R, m_ExtractLastElement(m_Specific(FOR)))) + if (!match(&R, m_ExtractFinalLane(m_Specific(FOR)))) continue; // For VF vscale x 1, if vscale = 1, we are unable to extract the diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index efbd02f7f4ecd..73b84553358b9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -378,20 +378,18 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // part. if (match(&R, m_VPInstruction( m_ExtractLastPart(m_VPValue(Op0)))) || - match(&R, m_ExtractLastElement(m_VPValue(Op0)))) { + match(&R, m_ExtractFinalLane(m_VPValue(Op0)))) { auto *I = cast(&R); - bool IsPenultimate = - I->getOpcode() == VPInstruction::ExtractPenultimateElement; - unsigned PartIdx = IsPenultimate ? UF - 2 : UF - 1; - + addUniformForAllParts(I); if (Plan.hasScalarVFOnly()) { + bool IsPenultimate = + I->getOpcode() == VPInstruction::ExtractPenultimateElement; + unsigned PartIdx = IsPenultimate ? UF - 2 : UF - 1; // For scalar VF, directly use the scalar part value. - addUniformForAllParts(I); I->replaceAllUsesWith(getValueForPart(Op0, PartIdx)); continue; } // For vector VF, extract from the last part. - addUniformForAllParts(I); R.setOperand(0, getValueForPart(Op0, UF - 1)); continue; } @@ -496,6 +494,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, continue; } if (Lane.getKind() == VPLane::Kind::ScalableLast) { + // Look through mandatory Unpack. + [[maybe_unused]] bool Matched = + match(Op, m_VPInstruction(m_VPValue(Op))); + assert(Matched && "original op must have been Unpack"); auto *ExtractPart = Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op}); NewOps.push_back( From f41528c64d55feacc0cd6d6cacfdc87e86209849 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 30 Oct 2025 04:21:11 +0000 Subject: [PATCH 3/3] !fixup address comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 16 +++++++++--- llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 12 ++++----- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 8 +++--- .../LoopVectorize/AArch64/vplan-printing.ll | 6 ++--- .../RISCV/vplan-vp-intrinsics-reduction.ll | 20 +++++--------- ...-order-recurrence-sink-replicate-region.ll | 4 +-- ...first-order-recurrence-with-uniform-ops.ll | 12 ++++----- .../vplan-printing-reductions.ll | 26 ++++++------------- .../LoopVectorize/vplan-printing.ll | 3 +-- 11 files changed, 51 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ae311ed7f05af..936dcf9f7e752 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8816,14 +8816,24 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } // Update all users outside the vector region. Also replace redundant - // ExtractLastElement. + // extracts. for (auto *U : to_vector(OrigExitingVPV->users())) { auto *Parent = cast(U)->getParent(); if (FinalReductionResult == U || Parent->getParent()) continue; U->replaceUsesOfWith(OrigExitingVPV, FinalReductionResult); - if (match(U, m_ExtractFinalLane(m_VPValue()))) - cast(U)->replaceAllUsesWith(FinalReductionResult); + + // Check for redundant ExtractLastPart followed by ExtractLastLane. + if (!match(U, m_ExtractLastPart(m_VPValue()))) + continue; + + auto *ExtractPart = cast(U); + if (ExtractPart->getNumUsers() != 1) + continue; + + VPUser *User = *ExtractPart->user_begin(); + if (match(User, m_ExtractLastLane(m_VPValue()))) + cast(User)->replaceAllUsesWith(FinalReductionResult); } // Adjust AnyOf reductions; replace the reduction phi for the selected value diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index cde4ae8cc489c..45d81278b836d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1400,9 +1400,9 @@ class VPIRInstruction : public VPRecipeBase { return true; } - /// Update the recipes first operand to the last lane of the operand using \p - /// Builder. Must only be used for VPIRInstructions with at least one operand - /// wrapping a PHINode. + /// Update the recipe's first operand to the final lane of the operand using + /// \p Builder. Must only be used for VPIRInstructions with at least one + /// operand wrapping a PHINode. void extractFinalLaneOfFirstOperand(VPBuilder &Builder); }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7cefaa8f4fcb3..aaa4cea9e0adc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -905,6 +905,7 @@ Value *VPInstruction::generate(VPTransformState &State) { // Extract lane VF - Offset from the operand. Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset)); } else { + // TODO: Remove ExtractLastLane for scalar VFs. assert(Offset <= 1 && "invalid offset to extract from"); Res = State.get(getOperand(0)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6f68b042e5640..6e7c6dcdf1ef7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1402,7 +1402,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (!vputils::isSingleScalar(RepOrWidenR) || !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || - match(cast(U), m_ExtractLastPart(m_VPValue())); + match(cast(U), + m_CombineOr(m_ExtractLastPart(m_VPValue()), + m_ExtractLastLane(m_VPValue()))); })) continue; @@ -3473,7 +3475,7 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1; if (ExitIRI->getNumOperands() != 1) { // The first of two operands corresponds to the latch exit, via MiddleVPBB - // predecessor. Extract its last lane. + // predecessor. Extract its final lane. ExitIRI->extractFinalLaneOfFirstOperand(MiddleBuilder); } @@ -4561,10 +4563,8 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan, Range)) return; VPValue *PenultimateElement = MiddleBuilder.createNaryOp( - VPInstruction::ExtractPenultimateElement, - MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, - FOR->getBackedgeValue()), - {}, "vector.recur.extract.for.phi"); + VPInstruction::ExtractPenultimateElement, FOR->getBackedgeValue(), {}, + "vector.recur.extract.for.phi"); cast(&R)->replaceAllUsesWith(PenultimateElement); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 73b84553358b9..7c63b788e99d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -377,19 +377,19 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // with the appropriate scalar part. Otherwise, update operand to use the // part. if (match(&R, m_VPInstruction( - m_ExtractLastPart(m_VPValue(Op0)))) || + m_VPValue(Op0))) || match(&R, m_ExtractFinalLane(m_VPValue(Op0)))) { auto *I = cast(&R); addUniformForAllParts(I); if (Plan.hasScalarVFOnly()) { - bool IsPenultimate = + bool IsPenultimatePart = I->getOpcode() == VPInstruction::ExtractPenultimateElement; - unsigned PartIdx = IsPenultimate ? UF - 2 : UF - 1; + unsigned PartIdx = IsPenultimatePart ? UF - 2 : UF - 1; // For scalar VF, directly use the scalar part value. I->replaceAllUsesWith(getValueForPart(Op0, PartIdx)); continue; } - // For vector VF, extract from the last part. + // For vector VF, always extract from the last part. R.setOperand(0, getValueForPart(Op0, UF - 1)); continue; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 36e1fcf8b9bbd..49f663f5703b6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -39,9 +39,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]> -; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART2:%.+]]> = extract-last-part vp<[[RED_RESULT_PART]]> -; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = extract-last-lane vp<[[RED_RESULT_PART2]]> +; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, vp<[[REDUCE]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -52,7 +50,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT_PART]]>, middle.block ], [ ir<0>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 7932adbe158b8..dff4971ffdfa1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -58,9 +58,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): middle.block ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: middle.block: -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]> -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]> ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: @@ -99,9 +97,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): middle.block ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: middle.block: -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: @@ -135,9 +131,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): middle.block ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: middle.block: -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; NO-VP-OUTLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]> ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph @@ -148,7 +142,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: ; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[IV_RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX_PART]]>, middle.block ], [ ir<%start>, ir-bb ] +; NO-VP-OUTLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX]]>, middle.block ], [ ir<%start>, ir-bb ] ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: @@ -185,9 +179,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): middle.block ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: middle.block: -; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> -; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]> -; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]> +; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]> ; NO-VP-INLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; NO-VP-INLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]> ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph @@ -198,7 +190,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: ; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[IV_RESUME:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX_PART]]>, middle.block ], [ ir<%start>, ir-bb ] +; NO-VP-INLOOP-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RDX]]>, middle.block ], [ ir<%start>, ir-bb ] ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 5a223b5b6c726..9deab9063d710 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -212,9 +212,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> -; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll index 33c91ca875ed7..198a30af814ba 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -137,18 +137,18 @@ define i16 @for_phi_removed(ptr %src) { ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 ; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: [[MIDDLE_BLOCK]]: +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]] ; UNROLL-NO-IC: [[SCALAR_PH]]: ; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]] ; UNROLL-NO-IC: [[LOOP]]: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 @@ -202,18 +202,18 @@ define i16 @for_phi_removed(ptr %src) { ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 ; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: [[MIDDLE_BLOCK]]: +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]] ; SINK-AFTER: [[SCALAR_PH]]: ; SINK-AFTER-NEXT: br label %[[LOOP:.*]] ; SINK-AFTER: [[LOOP]]: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index c875fee3e8d24..291ada86cf797 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -35,9 +35,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next> -; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -48,7 +46,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES_PART]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -179,9 +177,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RED_RES_PART:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]> -; CHECK-NEXT: EMIT vp<[[RED_RES_PART2:%.+]]> = extract-last-part vp<[[RED_RES_PART]]> -; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = extract-last-lane vp<[[RED_RES_PART2]]> +; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -192,7 +188,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT-SCALAR vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES_PART]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<[[RED_RESUME:%.+]]> = phi [ vp<[[RED_RES]]>, middle.block ], [ ir<0.000000e+00>, ir-bb ] ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -242,9 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RDX_RES_PART:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> -; CHECK-NEXT: EMIT vp<[[RDX_RES_PART2:%.+]]> = extract-last-part vp<[[RDX_RES_PART]]> -; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = extract-last-lane vp<[[RDX_RES_PART2]]> +; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -255,7 +249,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<{{.+}}>, middle.block ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RDX_RES_PART]]>, middle.block ], [ ir<%start>, ir-bb ] +; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RDX_RES]]>, middle.block ], [ ir<%start>, ir-bb ] ; entry: br label %loop @@ -503,14 +497,12 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8> -; CHECK-NEXT: EMIT vp<%11> = extract-last-part vp<%10> -; CHECK-NEXT: EMIT vp<%12> = extract-last-lane vp<%11> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%12> from middle.block) +; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: @@ -623,14 +615,12 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8> -; CHECK-NEXT: EMIT vp<%11> = extract-last-part vp<%10> -; CHECK-NEXT: EMIT vp<%12> = extract-last-lane vp<%11> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%12> from middle.block) +; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 727005666d868..002b88044de5e 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -964,8 +964,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next> ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]> -; CHECK-NEXT: EMIT vp<[[FOR_RESULT_PART:%.+]]> = extract-last-part ir<%for.1.next> -; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-penultimate-element vp<[[FOR_RESULT_PART]]> +; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-penultimate-element ir<%for.1.next> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph