Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8317,6 +8317,8 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (auto Plan = tryToBuildVPlanWithVPRecipes(
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
// Now optimize the initial VPlan.
VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
VPlanTransforms::sinkPredicatedStores(*Plan, *PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
Expand Down
291 changes: 291 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#define DEBUG_TYPE "loop-vectorize"

using namespace llvm;
using namespace VPlanPatternMatch;

Expand Down Expand Up @@ -3974,6 +3976,295 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}

// Collect common metadata from a group of replicate recipes by intersecting
// metadata from all recipes in the group.
static VPIRMetadata getCommonMetadata(ArrayRef<VPReplicateRecipe *> Recipes) {
VPIRMetadata CommonMetadata = *Recipes.front();
for (VPReplicateRecipe *Recipe : drop_begin(Recipes))
CommonMetadata.intersect(*Recipe);
return CommonMetadata;
}

// Helper to check if we can prove no aliasing using scoped noalias metadata.
static bool canProveNoAlias(const AAMDNodes &AA1, const AAMDNodes &AA2) {
return AA1.Scope && AA2.NoAlias &&
!ScopedNoAliasAAResult::mayAliasInScopes(AA1.Scope, AA2.NoAlias);
}

// Check if a memory operation doesn't alias with memory operations in blocks
// between FirstBB and LastBB using scoped noalias metadata.
// For load hoisting, we only check writes in one direction.
// For store sinking, we check both reads and writes bidirectionally.
static bool canHoistOrSinkWithNoAliasCheck(
const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
bool CheckReads,
const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
if (!MemLoc.AATags.Scope)
return false;

const AAMDNodes &MemAA = MemLoc.AATags;

for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (ExcludeRecipes && ExcludeRecipes->contains(&R))
continue;

// Skip recipes that don't need checking.
if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))
continue;

auto Loc = vputils::getMemoryLocation(R);
if (!Loc)
// Conservatively assume aliasing for memory operations without
// location. We already filtered by
// mayWriteToMemory()/mayReadFromMemory() above.
return false;

// Check for aliasing using scoped noalias metadata.
// For store sinking with CheckReads, we can prove no aliasing
// bidirectionally (either direction suffices).
if (CheckReads) {
if (canProveNoAlias(Loc->AATags, MemAA) ||
canProveNoAlias(MemAA, Loc->AATags))
continue;
}

// Check if the memory operations may alias in the standard direction.
if (ScopedNoAliasAAResult::mayAliasInScopes(MemAA.Scope,
Loc->AATags.NoAlias))
return false;
}

if (Block == LastBB)
break;
}
return true;
}

template <unsigned Opcode>
static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE,
const Loop *L) {
static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
"Only Load and Store opcodes supported");
constexpr bool IsLoad = (Opcode == Instruction::Load);
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPTypeAnalysis TypeInfo(Plan);

// Group predicated operations by their address SCEV.
MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> RecipesByAddress;
for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
continue;

// For loads, operand 0 is address; for stores, operand 1 is address.
VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
if (!isa<SCEVCouldNotCompute>(AddrSCEV))
RecipesByAddress[AddrSCEV].push_back(RepR);
}
}

// For each address, collect operations with the same or complementary masks.
SmallVector<SmallVector<VPReplicateRecipe *, 4>> AllGroups;
for (auto &[Addr, Recipes] : RecipesByAddress) {
if (Recipes.size() < 2)
continue;

// Collect groups with the same or complementary masks.
for (VPReplicateRecipe *&RecipeI : Recipes) {
if (!RecipeI)
continue;

VPValue *MaskI = RecipeI->getMask();
Type *TypeI =
TypeInfo.inferScalarType(IsLoad ? RecipeI : RecipeI->getOperand(0));
SmallVector<VPReplicateRecipe *, 4> Group;
Group.push_back(RecipeI);
RecipeI = nullptr;

// Find all operations with the same or complementary masks.
bool HasComplementaryMask = false;
for (VPReplicateRecipe *&RecipeJ : Recipes) {
if (!RecipeJ)
continue;

VPValue *MaskJ = RecipeJ->getMask();
Type *TypeJ =
TypeInfo.inferScalarType(IsLoad ? RecipeJ : RecipeJ->getOperand(0));
if (TypeI == TypeJ) {
// Check if any operation in the group has a complementary mask with
// another, that is M1 == NOT(M2) or M2 == NOT(M1).
HasComplementaryMask |= match(MaskI, m_Not(m_Specific(MaskJ))) ||
match(MaskJ, m_Not(m_Specific(MaskI)));
Group.push_back(RecipeJ);
RecipeJ = nullptr;
}
}

if (HasComplementaryMask) {
assert(Group.size() >= 2 && "must have at least 2 entries");
AllGroups.push_back(std::move(Group));
}
}
}

return AllGroups;
}

void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L) {
auto Groups =
collectComplementaryPredicatedMemOps<Instruction::Load>(Plan, SE, L);
if (Groups.empty())
return;

VPDominatorTree VPDT(Plan);

// Process each group of loads.
for (auto &Group : Groups) {
// Sort loads by dominance order, with earliest (most dominating) first.
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});

// Try to use the earliest (most dominating) load to replace all others.
VPReplicateRecipe *EarliestLoad = Group[0];
VPBasicBlock *FirstBB = EarliestLoad->getParent();
VPBasicBlock *LastBB = Group.back()->getParent();

// Check that the load doesn't alias with stores between first and last.
auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB,
/*CheckReads=*/false))
continue;

// Collect common metadata from all loads in the group.
VPIRMetadata CommonMetadata = getCommonMetadata(Group);

// Create an unpredicated version of the earliest load with common
// metadata.
auto *UnpredicatedLoad = new VPReplicateRecipe(
EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
/*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad,
CommonMetadata);

UnpredicatedLoad->insertBefore(EarliestLoad);

// Replace all loads in the group with the unpredicated load.
for (VPReplicateRecipe *Load : Group) {
Load->replaceAllUsesWith(UnpredicatedLoad);
Load->eraseFromParent();
}
}
}

static bool canSinkStoreWithNoAliasCheck(
VPReplicateRecipe *Store, ArrayRef<VPReplicateRecipe *> StoresToSink,
const SmallPtrSetImpl<VPRecipeBase *> *AlreadySunkStores = nullptr) {
auto StoreLoc = vputils::getMemoryLocation(*Store);
if (!StoreLoc)
return false;

SmallPtrSet<VPRecipeBase *, 4> StoresToSinkSet(StoresToSink.begin(),
StoresToSink.end());
if (AlreadySunkStores)
StoresToSinkSet.insert(AlreadySunkStores->begin(),
AlreadySunkStores->end());

VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
VPBasicBlock *LastBB = StoresToSink.back()->getParent();

if (StoreLoc->AATags.Scope)
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
/*CheckReads=*/true,
&StoresToSinkSet);

// Without alias scope metadata, we conservatively require no memory
// operations between the stores being sunk.
for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
if (StoresToSinkSet.contains(&R))
continue;

if (R.mayReadFromMemory() || R.mayWriteToMemory())
return false;
}

if (Block == LastBB)
break;
}

return true;
}

void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
const Loop *L) {
auto Groups =
collectComplementaryPredicatedMemOps<Instruction::Store>(Plan, SE, L);

if (Groups.empty())
return;

VPDominatorTree VPDT(Plan);

// Track stores from all groups that have been successfully sunk to exclude
// them from alias checks for subsequent groups.
SmallPtrSet<VPRecipeBase *, 16> AlreadySunkStores;

for (auto &Group : Groups) {
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});

if (!canSinkStoreWithNoAliasCheck(Group[0], Group, &AlreadySunkStores))
continue;

// Use the last (most dominated) store's location for the unconditional
// store.
VPReplicateRecipe *LastStore = Group.back();
VPBasicBlock *InsertBB = LastStore->getParent();

// Collect common alias metadata from all stores in the group.
VPIRMetadata CommonMetadata = getCommonMetadata(Group);

// Build select chain for stored values.
VPValue *SelectedValue = Group[0]->getOperand(0);
VPBuilder Builder(InsertBB, LastStore->getIterator());

for (unsigned I = 1; I < Group.size(); ++I) {
VPValue *Mask = Group[I]->getMask();
VPValue *Value = Group[I]->getOperand(0);
SelectedValue = Builder.createSelect(Mask, Value, SelectedValue,
Group[I]->getDebugLoc());
}

// Create unconditional store with selected value and common metadata.
VPValue *AddrVPValue = Group[0]->getOperand(1);
SmallVector<VPValue *> Operands = {SelectedValue, AddrVPValue};
auto *SI = cast<StoreInst>(Group[0]->getUnderlyingInstr());
auto *UnpredicatedStore =
new VPReplicateRecipe(SI, Operands, /*IsSingleScalar=*/false,
/*Mask=*/nullptr, *LastStore, CommonMetadata);
UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator());

// Track and remove all predicated stores from the group.
for (VPReplicateRecipe *Store : Group) {
AlreadySunkStores.insert(Store);
Store->eraseFromParent();
}
}
}

void VPlanTransforms::materializeConstantVectorTripCount(
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
PredicatedScalarEvolution &PSE) {
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,19 @@ struct VPlanTransforms {
/// plan using noalias metadata.
static void hoistInvariantLoads(VPlan &Plan);

/// Hoist predicated loads from the same address to the loop entry block, if
/// they are guaranteed to execute on both paths (i.e., in replicate regions
/// with complementary masks P and NOT P).
static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L);

/// Sink predicated stores to the same address with complementary predicates
/// (P and NOT P) to an unconditional store with select recipes for the
/// stored values. This eliminates branching overhead when all paths
/// unconditionally store to the same location.
static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
const Loop *L);

// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
Expand Down
Loading