Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8311,6 +8311,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (auto Plan = tryToBuildVPlanWithVPRecipes(
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
// Now optimize the initial VPlan.
VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
Expand Down
173 changes: 173 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#define DEBUG_TYPE "loop-vectorize"

using namespace llvm;
using namespace VPlanPatternMatch;

Expand Down Expand Up @@ -3968,6 +3970,177 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}

// Returns the intersection of metadata from a group of loads.
static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
VPIRMetadata CommonMetadata = *Loads.front();
for (VPReplicateRecipe *Load : drop_begin(Loads))
CommonMetadata.intersect(*Load);
return CommonMetadata;
}

// Check if a load can be hoisted by verifying it doesn't alias with any stores
// in blocks between FirstBB and LastBB using scoped noalias metadata.
static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load,
VPBasicBlock *FirstBB,
VPBasicBlock *LastBB) {
// Get the load's memory location and check if it aliases with any stores
// using scoped noalias metadata.
auto LoadLoc = vputils::getMemoryLocation(*Load);
if (!LoadLoc || !LoadLoc->AATags.Scope)
return false;

const AAMDNodes &LoadAA = LoadLoc->AATags;
for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
// This function assumes a simple linear chain of blocks. If there are
// multiple successors, we would need more complex analysis.
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (R.mayWriteToMemory()) {
auto Loc = vputils::getMemoryLocation(R);
// Bail out if we can't get the location or if the scoped noalias
// metadata indicates potential aliasing.
if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
LoadAA.Scope, Loc->AATags.NoAlias))
return false;
}
}

if (Block == LastBB)
break;
}
return true;
}

/// Check if \p Addr accesses consecutive memory locations of type \p LoadTy.
static bool isConsecutiveLoad(VPValue *Addr, Type *LoadTy, ScalarEvolution &SE,
const DataLayout &DL, const Loop *L) {
using namespace SCEVPatternMatch;
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
const SCEV *StepSCEV;
if (!match(AddrSCEV, m_scev_AffineAddRec(m_SCEV(), m_SCEV(StepSCEV),
m_SpecificLoop(L))))
return false;

TypeSize TS = DL.getTypeStoreSize(LoadTy);
const SCEV *ElementSizeSCEV = SE.getSizeOfExpr(StepSCEV->getType(), TS);
return SE.isKnownPositive(StepSCEV) && StepSCEV == ElementSizeSCEV;
}

void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPTypeAnalysis TypeInfo(Plan);
VPDominatorTree VPDT(Plan);

// Group predicated loads by their address SCEV.
MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->getOpcode() != Instruction::Load ||
!RepR->isPredicated())
continue;

VPValue *Addr = RepR->getOperand(0);
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
if (!isa<SCEVCouldNotCompute>(AddrSCEV))
LoadsByAddress[AddrSCEV].push_back(RepR);
}
}

// For each address, collect loads with complementary masks, sort by
// dominance, and use the earliest load.
for (auto &[Addr, Loads] : LoadsByAddress) {
if (Loads.size() < 2)
continue;

// Collect groups of loads with complementary masks.
SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
for (VPReplicateRecipe *&LoadI : Loads) {
if (!LoadI)
continue;

VPValue *MaskI = LoadI->getMask();
Type *TypeI = TypeInfo.inferScalarType(LoadI);
SmallVector<VPReplicateRecipe *, 4> Group;
Group.push_back(LoadI);
LoadI = nullptr;

// Find all loads with the same type.
for (VPReplicateRecipe *&LoadJ : Loads) {
if (!LoadJ)
continue;

Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
if (TypeI == TypeJ) {
Group.push_back(LoadJ);
LoadJ = nullptr;
}
}

// Check if any load in the group has a complementary mask with another,
// that is M1 == NOT(M2) or M2 == NOT(M1).
bool HasComplementaryMask =
any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
VPValue *MaskJ = Load->getMask();
return match(MaskI, m_Not(m_Specific(MaskJ))) ||
match(MaskJ, m_Not(m_Specific(MaskI)));
});

if (HasComplementaryMask)
LoadGroups.push_back(std::move(Group));
}

// For each group, check memory dependencies and hoist the earliest load.
for (auto &Group : LoadGroups) {
// Sort loads by dominance order, with earliest (most dominating) first.
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});

VPReplicateRecipe *EarliestLoad = Group.front();
VPBasicBlock *FirstBB = EarliestLoad->getParent();
VPBasicBlock *LastBB = Group.back()->getParent();

// Check that the load doesn't alias with stores between first and last.
if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB))
continue;

// Collect common metadata from all loads in the group.
VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group);

Type *LoadTy = TypeInfo.inferScalarType(EarliestLoad);
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
auto *LI = cast<LoadInst>(EarliestLoad->getUnderlyingInstr());
VPValue *NewLoad;
// Check if the load is consecutive to determine whether to widen it.
if (isConsecutiveLoad(EarliestLoad->getOperand(0), LoadTy, SE, DL, L)) {
auto *WidenedLoad = new VPWidenLoadRecipe(
*LI, EarliestLoad->getOperand(0), /*Mask=*/nullptr,
/*Consecutive=*/true, /*Reverse=*/false, CommonMetadata,
LI->getDebugLoc());
NewLoad = WidenedLoad;
} else {
auto *UnpredicatedLoad = new VPReplicateRecipe(
LI, {EarliestLoad->getOperand(0)}, /*IsSingleScalar=*/false,
/*Mask=*/nullptr, CommonMetadata);
NewLoad = UnpredicatedLoad;
}
NewLoad->getDefiningRecipe()->insertBefore(EarliestLoad);

// Replace all loads in the group with the new load.
for (VPReplicateRecipe *Load : Group) {
Load->replaceAllUsesWith(NewLoad);
Load->eraseFromParent();
}
}
}
}

void VPlanTransforms::materializeConstantVectorTripCount(
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
PredicatedScalarEvolution &PSE) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,13 @@ struct VPlanTransforms {
/// plan using noalias metadata.
static void hoistInvariantLoads(VPlan &Plan);

/// Hoist predicated loads from the same address to the loop entry block, if
/// they are guaranteed to execute on both paths (i.e., in replicate regions
/// with complementary masks P and NOT P). Consecutive loads are widened into
/// vector loads.
static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L);

// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
Expand Down
Loading
Loading