Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 20 additions & 102 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,11 @@ class LoopVectorizationCostModel {
return InLoopReductions.contains(Phi);
}

/// Returns the set of in-loop reduction PHIs.
const SmallPtrSetImpl<PHINode *> &getInLoopReductions() const {
return InLoopReductions;
}

/// Returns true if the predicated reduction select should be used to set the
/// incoming value for the reduction phi.
bool usePredicatedReductionSelect() const {
Expand Down Expand Up @@ -7627,57 +7632,6 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
Consecutive, Reverse, *VPI, VPI->getDebugLoc());
}

/// Creates a VPWidenIntOrFpInductionRecipe for \p PhiR. If needed, it will
/// also insert a recipe to expand the step for the induction recipe.
static VPWidenIntOrFpInductionRecipe *
createWidenInductionRecipes(VPInstruction *PhiR,
const InductionDescriptor &IndDesc, VPlan &Plan,
ScalarEvolution &SE, Loop &OrigLoop) {
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
"step must be loop invariant");

VPValue *Start = PhiR->getOperand(0);
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");

// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are never
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
using namespace llvm::VPlanPatternMatch;
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);

PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, Flags, PhiR->getDebugLoc());
}

VPHeaderPHIRecipe *
VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());

// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
return createWidenInductionRecipes(VPI, *II, Plan, *PSE.getSE(), *OrigLoop);

// Check if this is pointer induction. If so, build the recipe for it.
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step,
&Plan.getVFxUF(), *II,
VPI->getDebugLoc());
}
return nullptr;
}

VPWidenIntOrFpInductionRecipe *
VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
VFRange &Range) {
Expand Down Expand Up @@ -8186,45 +8140,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
// First, check for specific widening recipes that deal with inductions, Phi
// nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto *PhiR = dyn_cast<VPPhi>(R)) {
VPBasicBlock *Parent = PhiR->getParent();
[[maybe_unused]] VPRegionBlock *LoopRegionOf =
Parent->getEnclosingLoopRegion();
assert(LoopRegionOf && LoopRegionOf->getEntry() == Parent &&
"Non-header phis should have been handled during predication");
auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
VPValue *StartV = R->getOperand(0);
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// If the PHI is used by a partial reduction, set the scale factor.
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be modeled
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
}
// Add backedge value.
PhiRecipe->addOperand(R->getOperand(1));
return PhiRecipe;
}
assert(!R->isPhi() && "only VPPhi nodes expected at this point");
assert(!R->isPhi() && "phis must be handled earlier");

auto *VPI = cast<VPInstruction>(R);
Instruction *Instr = R->getUnderlyingInstr();
Expand Down Expand Up @@ -8284,6 +8200,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
if (isa<VPReductionPHIRecipe>(BinOp) || isa<VPPartialReductionRecipe>(BinOp))
std::swap(BinOp, Accumulator);

if (auto *RedPhiR = dyn_cast<VPReductionPHIRecipe>(Accumulator))
RedPhiR->setVFScaleFactor(ScaleFactor);

assert(ScaleFactor ==
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
"all accumulators in chain must have same scale factor");
Expand Down Expand Up @@ -8331,6 +8250,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
OrigLoop, *LI, Legal->getWidestInductionType(),
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), PSE, &LVer);

// Create recipes for header phis.
VPlanTransforms::createHeaderPhiRecipes(
*VPlan0, *PSE.getSE(), *OrigLoop, Legal->getInductionVars(),
Legal->getReductionVars(), Legal->getFixedOrderRecurrences(),
CM.getInLoopReductions(), Hints.allowReordering());

auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
Expand Down Expand Up @@ -8451,25 +8376,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;

// Now process all other blocks and instructions.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *SingleDef = cast<VPSingleDefRecipe>(&R);
auto *UnderlyingValue = SingleDef->getUnderlyingValue();
// Skip recipes that do not need transforming, including canonical IV,
// wide canonical IV and VPInstructions without underlying values. The
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
auto *SingleDef = dyn_cast<VPInstruction>(&R);
if (!SingleDef || !SingleDef->getUnderlyingValue())
continue;
assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe");

// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
Instruction *Instr = cast<Instruction>(UnderlyingValue);
Instruction *Instr = cast<Instruction>(SingleDef->getUnderlyingValue());
Builder.setInsertPoint(SingleDef);

// The stores with invariant address inside the loop will be deleted, and
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);

/// Check if an induction recipe should be constructed for \p VPI. If so build
/// and return it. If not, return null.
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);

/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
Expand Down
15 changes: 13 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
OpcodeTy Opcode;

/// An optional name that can be used for the generated IR instruction.
const std::string Name;
std::string Name;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
Expand Down Expand Up @@ -1225,6 +1225,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }

void setName(StringRef NewName) { Name = NewName.str(); }

protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPInstruction to \p O.
Expand Down Expand Up @@ -2375,8 +2377,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)

VPFirstOrderRecurrencePHIRecipe *clone() override {
return new VPFirstOrderRecurrencePHIRecipe(
auto *R = new VPFirstOrderRecurrencePHIRecipe(
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
R->addOperand(getOperand(1));
return R;
}

void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -2446,6 +2450,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }

/// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
/// > 1
void setVFScaleFactor(unsigned ScaleFactor) {
assert(ScaleFactor > 1 && "must set to scale factor > 1");
VFScaleFactor = ScaleFactor;
}

/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
/// incoming value, its start value.
Expand Down
101 changes: 101 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,15 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
Plan.getEntry()->swapSuccessors();

createExtractsForLiveOuts(Plan, MiddleVPBB);

VPBuilder ScalarPHBuilder(ScalarPH);
for (const auto &[PhiR, ScalarPhiR] : zip_equal(
drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
auto *VectorPhiR = cast<VPPhi>(&PhiR);
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
}
}

std::unique_ptr<VPlan>
Expand All @@ -566,6 +575,98 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}

/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
/// for \p Phi based on \p IndDesc.
static VPHeaderPHIRecipe *
createWidenInductionRecipe(VPPhi *PhiR, const InductionDescriptor &IndDesc,
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop) {
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
"step must be loop invariant");

VPValue *Start = PhiR->getOperand(0);
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
return new VPWidenPointerInductionRecipe(
cast<PHINode>(PhiR->getUnderlyingInstr()), Start, Step,
&Plan.getVFxUF(), IndDesc, PhiR->getDebugLoc());
// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
using namespace llvm::VPlanPatternMatch;
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);

// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are never
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);

return new VPWidenIntOrFpInductionRecipe(
cast<PHINode>(PhiR->getUnderlyingInstr()), Start, Step, &Plan.getVF(),
IndDesc, Flags, PhiR->getDebugLoc());
}

void VPlanTransforms::createHeaderPhiRecipes(
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
const MapVector<PHINode *, InductionDescriptor> &Inductions,
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {

VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());

for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
auto *PhiR = dyn_cast<VPPhi>(&R);
if (!PhiR)
break;

// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
assert(PhiR->getNumOperands() == 2 &&
"Must have 2 operands for header phis");

VPHeaderPHIRecipe *HeaderPhiR = nullptr;
auto InductionIt = Inductions.find(Phi);
if (InductionIt != Inductions.end()) {
HeaderPhiR = createWidenInductionRecipe(PhiR, InductionIt->second, Plan,
SE, OrigLoop);
} else {
VPValue *Start = PhiR->getOperand(0);
auto ReductionIt = Reductions.find(Phi);
if (ReductionIt != Reductions.end()) {
const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));

bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
HeaderPhiR = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *Start,
InLoopReductions.contains(Phi), UseOrderedReductions);
} else {
assert(FixedOrderRecurrences.contains(Phi) &&
"can only widen reductions and fixed-order recurrences here");
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be
// modeled directly, enabling more efficient codegen.
HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
}
HeaderPhiR->addOperand(PhiR->getOperand(1));
}
HeaderPhiR->insertBefore(PhiR);
PhiR->replaceAllUsesWith(HeaderPhiR);
PhiR->eraseFromParent();
}
}

void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
Expand Down
Loading