Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8317,6 +8317,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (auto Plan = tryToBuildVPlanWithVPRecipes(
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
// Now optimize the initial VPlan.
VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
Expand Down
146 changes: 146 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3974,6 +3974,152 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}

// Returns the intersection of metadata from a group of loads.
static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
VPIRMetadata CommonMetadata = *Loads.front();
for (VPReplicateRecipe *Load : drop_begin(Loads))
CommonMetadata.intersect(*Load);
return CommonMetadata;
Comment on lines +3979 to +3982
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to factor this into a VPIRMetadata::intersect_range?

}

// Check if a load can be hoisted by verifying it doesn't alias with any stores
// in blocks between FirstBB and LastBB using scoped noalias metadata.
static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load,
VPBasicBlock *FirstBB,
VPBasicBlock *LastBB) {
// Get the load's memory location and check if it aliases with any stores
// using scoped noalias metadata.
auto LoadLoc = vputils::getMemoryLocation(*Load);
if (!LoadLoc || !LoadLoc->AATags.Scope)
return false;

const AAMDNodes &LoadAA = LoadLoc->AATags;
for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
// This function assumes a simple linear chain of blocks. If there are
// multiple successors, we would need more complex analysis.
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (R.mayWriteToMemory()) {
auto Loc = vputils::getMemoryLocation(R);
// Bail out if we can't get the location or if the scoped noalias
// metadata indicates potential aliasing.
if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
LoadAA.Scope, Loc->AATags.NoAlias))
return false;
}
}

if (Block == LastBB)
break;
Comment on lines +3997 to +4016
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
// This function assumes a simple linear chain of blocks. If there are
// multiple successors, we would need more complex analysis.
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (R.mayWriteToMemory()) {
auto Loc = vputils::getMemoryLocation(R);
// Bail out if we can't get the location or if the scoped noalias
// metadata indicates potential aliasing.
if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
LoadAA.Scope, Loc->AATags.NoAlias))
return false;
}
}
if (Block == LastBB)
break;
for (VPBlockBase *Block = FirstBB; Block && Block != LastBB;
Block = Block->getSingleSuccessor()) {
// This function assumes a simple linear chain of blocks. If there are
// multiple successors, we would need more complex analysis.
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (R.mayWriteToMemory()) {
auto Loc = vputils::getMemoryLocation(R);
// Bail out if we can't get the location or if the scoped noalias
// metadata indicates potential aliasing.
if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
LoadAA.Scope, Loc->AATags.NoAlias))
return false;
}
}

}
return true;
}
Comment on lines +3985 to +4019
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put this function above cannotHoistOrSinkRecipe, so we can also try to use it there?


void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPTypeAnalysis TypeInfo(Plan);
VPDominatorTree VPDT(Plan);

// Group predicated loads by their address SCEV.
MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this not be a SmallPtrSet? Do the SCEVs need to be ordered?

for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->getOpcode() != Instruction::Load ||
!RepR->isPredicated())
continue;

VPValue *Addr = RepR->getOperand(0);
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
if (!isa<SCEVCouldNotCompute>(AddrSCEV))
LoadsByAddress[AddrSCEV].push_back(RepR);
}
}

// For each address, collect loads with complementary masks, sort by
// dominance, and use the earliest load.
for (auto &[Addr, Loads] : LoadsByAddress) {
if (Loads.size() < 2)
continue;

// Collect groups of loads with complementary masks.
SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
for (VPReplicateRecipe *&LoadI : Loads) {
if (!LoadI)
continue;

VPValue *MaskI = LoadI->getMask();
Type *TypeI = TypeInfo.inferScalarType(LoadI);
SmallVector<VPReplicateRecipe *, 4> Group;
Group.push_back(LoadI);
LoadI = nullptr;

// Find all loads with the same type.
for (VPReplicateRecipe *&LoadJ : Loads) {
if (!LoadJ)
continue;

Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
if (TypeI == TypeJ) {
Group.push_back(LoadJ);
LoadJ = nullptr;
}
}

// Check if any load in the group has a complementary mask with another,
// that is M1 == NOT(M2) or M2 == NOT(M1).
bool HasComplementaryMask =
any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
VPValue *MaskJ = Load->getMask();
return match(MaskI, m_Not(m_Specific(MaskJ))) ||
match(MaskJ, m_Not(m_Specific(MaskI)));
});

if (HasComplementaryMask)
LoadGroups.push_back(std::move(Group));
}

// For each group, check memory dependencies and hoist the earliest load.
for (auto &Group : LoadGroups) {
// Sort loads by dominance order, with earliest (most dominating) first.
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});

VPReplicateRecipe *EarliestLoad = Group.front();
VPBasicBlock *FirstBB = EarliestLoad->getParent();
VPBasicBlock *LastBB = Group.back()->getParent();

// Check that the load doesn't alias with stores between first and last.
if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB))
continue;

// Collect common metadata from all loads in the group.
VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group);

// Create an unpredicated version of the earliest load with common
// metadata.
auto *UnpredicatedLoad = new VPReplicateRecipe(
EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
EarliestLoad->getUnderlyingInstr(), EarliestLoad->operands(),

/*IsSingleScalar=*/false, /*Mask=*/nullptr, /*Flags=*/{},
CommonMetadata);

UnpredicatedLoad->insertBefore(EarliestLoad);

// Replace all loads in the group with the unpredicated load.
for (VPReplicateRecipe *Load : Group) {
Load->replaceAllUsesWith(UnpredicatedLoad);
Load->eraseFromParent();
}
}
}
}

void VPlanTransforms::materializeConstantVectorTripCount(
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
PredicatedScalarEvolution &PSE) {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,12 @@ struct VPlanTransforms {
/// plan using noalias metadata.
static void hoistInvariantLoads(VPlan &Plan);

/// Hoist predicated loads from the same address to the loop entry block, if
/// they are guaranteed to execute on both paths (i.e., in replicate regions
/// with complementary masks P and NOT P).
static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L);

// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
Expand Down
Loading