Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -620,14 +620,15 @@ class LoopVectorizationPlanner {
/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);

// Adjust the recipes for reductions. For in-loop reductions the chain of
// instructions leading from the loop exit instr to the phi need to be
// converted to reductions, with one operand being vector and the other being
// the scalar reduction chain. For other reductions, a select is introduced
// between the phi and users outside the vector region when folding the tail.
void adjustRecipesForReductions(VPlanPtr &Plan,
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
/// Introduce recipes to compute the final reduction result
/// (ComputeFindIVResult, ComputeAnyOfResult, ComputeReductionResult depending
/// on the reduction) in the middle block. Selects are introduced for regular
/// reductions between the phi and users outside the vector region when
/// folding the tail.
///
void introduceReductionResultComputation(VPlanPtr &Plan,
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);

/// Attach the runtime checks of \p RTChecks to \p Plan.
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
Expand Down
306 changes: 32 additions & 274 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,6 @@ class VPRecipeBuilder {
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);

/// Check if an induction recipe should be constructed for \p VPI. If so build
/// and return it. If not, return null.
VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI,
VFRange &Range);

/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
VPWidenIntOrFpInductionRecipe *
Expand Down
21 changes: 11 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
OpcodeTy Opcode;

/// An optional name that can be used for the generated IR instruction.
const std::string Name;
std::string Name;

/// Returns true if we can generate a scalar for the first lane only if
/// needed.
Expand Down Expand Up @@ -1225,6 +1225,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }

void setName(StringRef NewName) { Name = NewName.str(); }

protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the VPInstruction to \p O.
Expand Down Expand Up @@ -2281,19 +2283,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
};

class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
bool IsScalarAfterVectorization;

public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start and the number of elements unrolled \p NumUnrolledElems, typically
/// VF*UF.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
VPValue *NumUnrolledElems,
const InductionDescriptor &IndDesc,
bool IsScalarAfterVectorization, DebugLoc DL)
const InductionDescriptor &IndDesc, DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
Step, IndDesc, DL),
IsScalarAfterVectorization(IsScalarAfterVectorization) {
Step, IndDesc, DL) {
addOperand(NumUnrolledElems);
}

Expand All @@ -2302,8 +2300,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
getDebugLoc());
getOperand(2), getInductionDescriptor(), getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
Expand Down Expand Up @@ -2380,8 +2377,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)

VPFirstOrderRecurrencePHIRecipe *clone() override {
return new VPFirstOrderRecurrencePHIRecipe(
auto *R = new VPFirstOrderRecurrencePHIRecipe(
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
R->addOperand(getOperand(1));
return R;
}

void execute(VPTransformState &State) override;
Expand Down Expand Up @@ -2451,6 +2450,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }

void setVFScaleFactor(unsigned ScaleFactor) { VFScaleFactor = ScaleFactor; }

/// Returns the number of incoming values, also number of incoming blocks.
/// Note that at the moment, VPWidenPointerInductionRecipe only has a single
/// incoming value, its start value.
Expand Down
268 changes: 268 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
Expand Down Expand Up @@ -554,6 +555,15 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
Plan.getEntry()->swapSuccessors();

createExtractsForLiveOuts(Plan, MiddleVPBB);

VPBuilder ScalarPHBuilder(ScalarPH);
for (const auto &[PhiR, ScalarPhiR] : zip_equal(
drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
auto *VectorPhiR = cast<VPPhi>(&PhiR);
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
}
}

std::unique_ptr<VPlan>
Expand All @@ -566,6 +576,264 @@ VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
return VPlan0;
}

/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
/// for \p Phi based on \p IndDesc.
static VPHeaderPHIRecipe *
createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR,
const InductionDescriptor &IndDesc, VPlan &Plan,
ScalarEvolution &SE, Loop &OrigLoop) {
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
"step must be loop invariant");

VPValue *Start = PhiR->getOperand(0);
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

if (IndDesc.getKind() == InductionDescriptor::IK_PtrInduction)
return new VPWidenPointerInductionRecipe(Phi, Start, Step, &Plan.getVFxUF(),
IndDesc, PhiR->getDebugLoc());

// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are never
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);

// Update wide induction increments to use the same step as the corresponding
// wide induction. This enables detecting induction increments directly in
// VPlan and removes redundant splats.
using namespace llvm::VPlanPatternMatch;
if (match(PhiR->getOperand(1), m_Add(m_Specific(PhiR), m_VPValue())))
PhiR->getOperand(1)->getDefiningRecipe()->setOperand(1, Step);

return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, Flags, PhiR->getDebugLoc());
}

void VPlanTransforms::createHeaderPhiRecipes(
VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
const MapVector<PHINode *, InductionDescriptor> &Inductions,
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {

VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());

for (VPRecipeBase &R : make_early_inc_range(*HeaderVPBB)) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
auto *PhiR = dyn_cast<VPPhi>(&R);
if (!PhiR)
break;

// TODO: Gradually replace uses of underlying instruction by analyses on
// VPlan.
auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
assert(PhiR->getNumOperands() == 2 &&
"Must have 2 operands for header phis");

VPHeaderPHIRecipe *HeaderPhiR = nullptr;
auto InductionIt = Inductions.find(Phi);
if (InductionIt != Inductions.end()) {
HeaderPhiR = createWidenInductionRecipe(Phi, PhiR, InductionIt->second, Plan,
SE, OrigLoop);
} else {
VPValue *Start = PhiR->getOperand(0);
auto ReductionIt = Reductions.find(Phi);
if (ReductionIt != Reductions.end()) {
const RecurrenceDescriptor &RdxDesc = ReductionIt->second;
assert(RdxDesc.getRecurrenceStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));

bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
HeaderPhiR = new VPReductionPHIRecipe(Phi, RdxDesc.getRecurrenceKind(),
*Start, InLoopReductions.contains(Phi),
UseOrderedReductions);
} else {
assert(FixedOrderRecurrences.contains(Phi) &&
"can only widen reductions and fixed-order recurrences here");
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be
// modeled directly, enabling more efficient codegen.
HeaderPhiR = new VPFirstOrderRecurrencePHIRecipe(Phi, *Start);
}
HeaderPhiR->addOperand(PhiR->getOperand(1));
}
HeaderPhiR->insertBefore(PhiR);
PhiR->replaceAllUsesWith(HeaderPhiR);
PhiR->eraseFromParent();
}
}

void VPlanTransforms::createVPReductionRecipesForInLoopReductions(
VPlan &Plan, const DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
const DenseSet<BasicBlock *> &BlocksNeedingPredication,
ElementCount MinVF) {
VPTypeAnalysis TypeInfo(Plan);
VPRegionBlock *VectorLoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
SmallVector<VPRecipeBase *> ToDelete;

for (VPRecipeBase &R : Header->phis()) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;

RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
"AnyOf and FindIV reductions are not allowed for in-loop reductions");

bool IsFPRecurrence =
RecurrenceDescriptor::isFloatingPointRecurrenceKind(Kind);
FastMathFlags FMFs =
IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();

// Collect the chain of "link" recipes for the reduction starting at PhiR.
SetVector<VPSingleDefRecipe *> Worklist;
Worklist.insert(PhiR);
for (unsigned I = 0; I != Worklist.size(); ++I) {
VPSingleDefRecipe *Cur = Worklist[I];
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
assert((UserRecipe->getParent() == MiddleVPBB ||
UserRecipe->getParent() == Plan.getScalarPreheader()) &&
"U must be either in the loop region, the middle block or the "
"scalar preheader.");
continue;
}

// Stores using instructions will be sunk later.
if (match(UserRecipe, m_VPInstruction<Instruction::Store>()))
continue;
Worklist.insert(UserRecipe);
}
}

// Visit operation "Links" along the reduction chain top-down starting from
// the phi until LoopExitValue. We keep track of the previous item
// (PreviousLink) to tell which of the two operands of a Link will remain
// scalar and which will be reduced. For minmax by select(cmp), Link will be
// the select instructions. Blend recipes of in-loop reduction phi's will
// get folded to their non-phi operand, as the reduction recipe handles the
// condition directly.
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
assert(Blend->getNumIncomingValues() == 2 &&
"Blend must have 2 incoming values");
if (Blend->getIncomingValue(0) == PhiR) {
Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
} else {
assert(Blend->getIncomingValue(1) == PhiR &&
"PhiR must be an operand of the blend");
Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
}
continue;
}

if (IsFPRecurrence) {
FastMathFlags CurFMF =
cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
->getFastMathFlags();
FMFs &= CurFMF;
}

Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();

// Index of the first operand which holds a non-mask vector operand.
unsigned IndexOfFirstOperand;
// Recognize a call to the llvm.fmuladd intrinsic.
bool IsFMulAdd = Kind == RecurKind::FMulAdd &&
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI);
VPValue *VecOp;
VPBasicBlock *LinkVPBB = CurrentLink->getParent();
if (IsFMulAdd) {
assert(CurrentLink->getOperand(2) == PreviousLink &&
"expected a call where the previous link is the added operand");

// If the instruction is a call to the llvm.fmuladd intrinsic then we
// need to create an fmul recipe (multiplying the first two operands of
// the fmuladd together) to use as the vector operand for the fadd
// reduction.
VPInstruction *FMulRecipe = new VPInstruction(
Instruction::FMul,
{CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
CurrentLinkI->getFastMathFlags());
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
VecOp = FMulRecipe;
} else if (PhiR->isInLoop() && Kind == RecurKind::AddChainWithSubs &&
match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
Type *PhiTy = TypeInfo.inferScalarType(PhiR);
auto *Zero = Plan.getConstantInt(PhiTy, 0);
VPWidenRecipe *Sub = new VPWidenRecipe(
Instruction::Sub, {Zero, CurrentLink->getOperand(1)}, {},
VPIRMetadata(), CurrentLinkI->getDebugLoc());
Sub->setUnderlyingValue(CurrentLinkI);
LinkVPBB->insert(Sub, CurrentLink->getIterator());
VecOp = Sub;
} else {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
continue;
assert(match(CurrentLink,
m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
"must be a select recipe");
IndexOfFirstOperand = 1;
} else {
IndexOfFirstOperand = 0;
}
// Note that for non-commutable operands (cmp-selects), the semantics of
// the cmp-select are captured in the recurrence kind.
unsigned VecOpId =
CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
? IndexOfFirstOperand + 1
: IndexOfFirstOperand;
VecOp = CurrentLink->getOperand(VecOpId);
assert(VecOp != PreviousLink &&
CurrentLink->getOperand(CurrentLink->getNumOperands() - 1 -
(VecOpId - IndexOfFirstOperand)) ==
PreviousLink &&
"PreviousLink must be the operand other than VecOp");
}

// Get block mask from BlockMaskCache if the block needs predication.
VPValue *CondOp = nullptr;
if (BlocksNeedingPredication.contains(CurrentLinkI->getParent())) {
auto MaskIt = BlockMaskCache.find(LinkVPBB);
if (MaskIt != BlockMaskCache.end())
CondOp = MaskIt->second;
}

auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
// with a reduction defined at the bottom of the block in the next link.
if (LinkVPBB->getNumSuccessors() == 0)
RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
else
LinkVPBB->appendRecipe(RedRecipe);

CurrentLink->replaceAllUsesWith(RedRecipe);
ToDelete.push_back(CurrentLink);
PreviousLink = RedRecipe;
}
}

for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();
}

void VPlanTransforms::handleEarlyExits(VPlan &Plan,
bool HasUncountableEarlyExit) {
auto *MiddleVPBB = cast<VPBasicBlock>(
Expand Down
Loading
Loading