Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 17 additions & 25 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,

VPValue *Mask = nullptr;
if (Legal->isMaskRequired(I))
Mask = getBlockInMask(Builder.getInsertBlock());
Mask = Builder.getInsertBlock()->getEntryMask();

// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
Expand Down Expand Up @@ -7709,7 +7709,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// all-true mask.
VPValue *Mask = nullptr;
if (Legal->isMaskRequired(CI))
Mask = getBlockInMask(Builder.getInsertBlock());
Mask = Builder.getInsertBlock()->getEntryMask();
else
Mask = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
Expand Down Expand Up @@ -7751,7 +7751,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
// div/rem operation itself. Otherwise fall through to general handling below.
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *Mask = Builder.getInsertBlock()->getEntryMask();
VPValue *One =
Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
Expand Down Expand Up @@ -7833,7 +7833,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
if (Legal->isMaskRequired(HI->Store))
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
HGramOps.push_back(Builder.getInsertBlock()->getEntryMask());

return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
}
Expand Down Expand Up @@ -7887,7 +7887,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
// added initially. Masked replicate recipes will later be placed under an
// if-then construct to prevent side-effects. Generate recipes to compute
// the block mask for this region.
BlockInMask = getBlockInMask(Builder.getInsertBlock());
BlockInMask = Builder.getInsertBlock()->getEntryMask();
}

// Note that there is some custom logic to mark some intrinsics as uniform
Expand Down Expand Up @@ -8178,7 +8178,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
ReductionOpcode == Instruction::Sub) &&
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
Cond = Builder.getInsertBlock()->getEntryMask();
VPValue *Zero =
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
Expand Down Expand Up @@ -8306,15 +8306,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// ---------------------------------------------------------------------------
// Predicate and linearize the top-level loop region.
// ---------------------------------------------------------------------------
auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize(
*Plan, CM.foldTailByMasking());
VPlanTransforms::introduceMasksAndLinearize(*Plan, CM.foldTailByMasking());

// ---------------------------------------------------------------------------
// Construct wide recipes and apply predication for original scalar
// VPInstructions in the loop.
// ---------------------------------------------------------------------------
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, LVer);
Builder, LVer);
RecipeBuilder.collectScaledReductions(Range);

// Scan the body of the loop in a topological order to visit each basic block
Expand All @@ -8325,9 +8324,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

auto *MiddleVPBB = Plan->getMiddleBlock();
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
// temporarily to update created block masks.
DenseMap<VPValue *, VPValue *> Old2New;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
// Convert input VPInstructions to widened recipes.
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
Expand Down Expand Up @@ -8381,7 +8377,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
}
if (Recipe->getNumDefinedValues() == 1) {
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
Old2New[SingleDef] = Recipe->getVPSingleValue();
SingleDef->eraseFromParent();
} else {
assert(Recipe->getNumDefinedValues() == 0 &&
"Unexpected multidef recipe");
Expand All @@ -8390,14 +8386,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
}
}

// replaceAllUsesWith above may invalidate the block masks. Update them here.
// TODO: Include the masks as operands in the predicated VPlan directly
// to remove the need to keep a map of masks beyond the predication
// transform.
RecipeBuilder.updateBlockMaskCache(Old2New);
for (VPValue *Old : Old2New.keys())
Old->getDefiningRecipe()->eraseFromParent();

assert(isa<VPRegionBlock>(LoopRegion) &&
!LoopRegion->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
Expand Down Expand Up @@ -8431,6 +8419,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Erase the block entry masks, since they're not used any longer, so that
// future transforms only deal with recipe VPUsers.
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
VPBB->eraseEntryMask();

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
Expand Down Expand Up @@ -8521,9 +8514,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {

// Collect mapping of IR header phis to header phi recipes, to be used in
// addScalarResumePhis.
DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
Builder, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
Expand Down Expand Up @@ -8681,7 +8673,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(

VPValue *CondOp = nullptr;
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
CondOp = CurrentLink->getParent()->getEntryMask();

// TODO: Retrieve FMFs from recipes directly.
RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor(
Expand Down Expand Up @@ -8729,7 +8721,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// different numbers of lanes. Partial reductions mask the input instead.
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
VPValue *Cond = PhiR->getParent()->getEntryMask();
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
? std::make_optional(RdxDesc.getFastMathFlags())
Expand Down
24 changes: 1 addition & 23 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,6 @@ class VPRecipeBuilder {

VPBuilder &Builder;

/// The mask of each VPBB, generated earlier and used for predicating recipes
/// in VPBB.
/// TODO: remove by applying predication when generating the masks.
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;

// VPlan construction support: Hold a mapping from ingredients to
// their recipe.
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
Expand Down Expand Up @@ -149,11 +144,9 @@ class VPRecipeBuilder {
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
LoopVersioning *LVer)
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
LVer(LVer) {}
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}

std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
auto It = ScaledReductionMap.find(ExitInst);
Expand Down Expand Up @@ -182,12 +175,6 @@ class VPRecipeBuilder {
Ingredient2Recipe[I] = R;
}

/// Returns the *entry* mask for block \p VPBB or null if the mask is
/// all-true.
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
return BlockMaskCache.lookup(VPBB);
}

/// Return the recipe created for given ingredient.
VPRecipeBase *getRecipe(Instruction *I) {
assert(Ingredient2Recipe.count(I) &&
Expand All @@ -211,15 +198,6 @@ class VPRecipeBuilder {
}
return Plan.getOrAddLiveIn(V);
}

void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
for (auto &[_, V] : BlockMaskCache) {
if (auto *New = Old2New.lookup(V)) {
V->replaceAllUsesWith(New);
V = New;
}
}
}
};
} // end namespace llvm

Expand Down
25 changes: 21 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3811,13 +3811,15 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>

/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
/// output IR instructions. All PHI-like recipes must come before any non-PHI
/// recipes. It also has an operand corresponding to a mask on which to enter
/// the block, which is used early in the VPlan construction.
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase, protected VPUser {
friend class VPlan;

/// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
: VPBlockBase(VPBasicBlockSC, Name.str()) {
: VPBlockBase(VPBasicBlockSC, Name.str()), VPUser(VPUBlockSC) {
if (Recipe)
appendRecipe(Recipe);
}
Expand All @@ -3830,7 +3832,7 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
RecipeListTy Recipes;

VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
: VPBlockBase(BlockSC, Name.str()) {}
: VPBlockBase(BlockSC, Name.str()), VPUser(VPUBlockSC) {}

public:
~VPBasicBlock() override {
Expand Down Expand Up @@ -3941,6 +3943,21 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
/// second predecessor is the exiting block of the region.
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;

/// Get the entry mask of this block. nullptr is used to communicate an
/// all-ones mask.
VPValue *getEntryMask() const {
return getNumOperands() ? getOperand(0) : nullptr;
}

/// Set the entry mask of this block: used by VPlanPredicator, when
/// predicating blocks.
void setEntryMask(VPValue *M) {
getNumOperands() ? setOperand(0, M) : addOperand(M);
}

/// Erase the entry mask of this block.
void eraseEntryMask() { eraseOperands(); }

protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
Expand Down
36 changes: 6 additions & 30 deletions llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,15 @@ class VPPredicator {
using EdgeMaskCacheTy =
DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
VPValue *>;
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
EdgeMaskCacheTy EdgeMaskCache;

BlockMaskCacheTy BlockMaskCache;

/// Create an edge mask for every destination of cases and/or default.
void createSwitchEdgeMasks(VPInstruction *SI);

/// Computes and return the predicate of the edge between \p Src and \p Dst,
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);

/// Returns the *entry* mask for \p VPBB.
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
return BlockMaskCache.lookup(VPBB);
}

/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
/// already have a mask.
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
// TODO: Include the masks as operands in the predicated VPlan directly to
// avoid keeping the map of masks beyond the predication transform.
assert(!getBlockInMask(VPBB) && "Mask already set");
BlockMaskCache[VPBB] = Mask;
}

/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
/// expected to not have a mask already.
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
Expand All @@ -82,8 +65,6 @@ class VPPredicator {

/// Convert phi recipes in \p VPBB to VPBlendRecipes.
void convertPhisToBlends(VPBasicBlock *VPBB);

const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
};
} // namespace

Expand All @@ -95,7 +76,7 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
if (EdgeMask)
return EdgeMask;

VPValue *SrcMask = getBlockInMask(Src);
VPValue *SrcMask = Src->getEntryMask();

// If there's a single successor, there's no terminator recipe.
if (Src->getNumSuccessors() == 1)
Expand Down Expand Up @@ -140,7 +121,6 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
// too.
setBlockInMask(VPBB, EdgeMask);
return EdgeMask;
}

Expand All @@ -152,15 +132,13 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
}

setBlockInMask(VPBB, BlockMask);
VPBB->setEntryMask(BlockMask);
return BlockMask;
}

void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
if (!FoldTail) {
setBlockInMask(HeaderVPBB, nullptr);
if (!FoldTail)
return;
}

// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
Expand All @@ -175,7 +153,7 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {

VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
setBlockInMask(HeaderVPBB, BlockMask);
HeaderVPBB->setEntryMask(BlockMask);
}

void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
Expand All @@ -201,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {

// We need to handle 2 separate cases below for all entries in Dst2Compares,
// which excludes destinations matching the default destination.
VPValue *SrcMask = getBlockInMask(Src);
VPValue *SrcMask = Src->getEntryMask();
VPValue *DefaultMask = nullptr;
for (const auto &[Dst, Conds] : Dst2Compares) {
// 1. Dst is not the default destination. Dst is reached if any of the
Expand Down Expand Up @@ -261,8 +239,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
}
}

DenseMap<VPBasicBlock *, VPValue *>
VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
Expand Down Expand Up @@ -301,5 +278,4 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {

PrevVPBB = VPBB;
}
return Predicator.getBlockMaskCache();
}
7 changes: 2 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,8 @@ struct VPlanTransforms {
/// Predicate and linearize the control-flow in the only loop region of
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
/// header, otherwise use all-true for the header mask. Masks for blocks are
/// added to a block-to-mask map which is returned in order to be used later
/// for wide recipe construction. This argument is temporary and will be
/// removed in the future.
static DenseMap<VPBasicBlock *, VPValue *>
introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
/// added to blocks themselves.
static void introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);

/// Add branch weight metadata, if the \p Plan's middle block is terminated by
/// a BranchOnCond recipe.
Expand Down
Loading