@@ -3958,6 +3958,151 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
39583958 }
39593959}
39603960
3961+ // Returns the intersection of metadata from a group of loads.
3962+ static VPIRMetadata getCommonLoadMetadata (ArrayRef<VPReplicateRecipe *> Loads) {
3963+ VPIRMetadata CommonMetadata = *Loads.front ();
3964+ for (VPReplicateRecipe *Load : drop_begin (Loads))
3965+ CommonMetadata.intersect (*Load);
3966+ return CommonMetadata;
3967+ }
3968+
3969+ // Check if a load can be hoisted by verifying it doesn't alias with any stores
3970+ // in blocks between FirstBB and LastBB using scoped noalias metadata.
3971+ static bool canHoistLoadWithNoAliasCheck (VPReplicateRecipe *Load,
3972+ VPBasicBlock *FirstBB,
3973+ VPBasicBlock *LastBB) {
3974+ // Get the load's memory location and check if it aliases with any stores
3975+ // using scoped noalias metadata.
3976+ auto LoadLoc = vputils::getMemoryLocation (*Load);
3977+ if (!LoadLoc || !LoadLoc->AATags .Scope )
3978+ return false ;
3979+
3980+ const AAMDNodes &LoadAA = LoadLoc->AATags ;
3981+ for (VPBlockBase *Block = FirstBB; Block;
3982+ Block = Block->getSingleSuccessor ()) {
3983+ // This function assumes a simple linear chain of blocks. If there are
3984+ // multiple successors, we would need more complex analysis.
3985+ assert (Block->getNumSuccessors () <= 1 &&
3986+ " Expected at most one successor in block chain" );
3987+ auto *VPBB = cast<VPBasicBlock>(Block);
3988+ for (VPRecipeBase &R : *VPBB) {
3989+ if (R.mayWriteToMemory ()) {
3990+ auto Loc = vputils::getMemoryLocation (R);
3991+ // Bail out if we can't get the location or if the scoped noalias
3992+ // metadata indicates potential aliasing.
3993+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes (
3994+ LoadAA.Scope , Loc->AATags .NoAlias ))
3995+ return false ;
3996+ }
3997+ }
3998+
3999+ if (Block == LastBB)
4000+ break ;
4001+ }
4002+ return true ;
4003+ }
4004+
4005+ void VPlanTransforms::hoistPredicatedLoads (VPlan &Plan, ScalarEvolution &SE,
4006+ const Loop *L) {
4007+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
4008+ VPTypeAnalysis TypeInfo (Plan);
4009+ VPDominatorTree VPDT (Plan);
4010+
4011+ // Group predicated loads by their address SCEV.
4012+ MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
4013+ for (VPBlockBase *Block : vp_depth_first_shallow (LoopRegion->getEntry ())) {
4014+ auto *VPBB = cast<VPBasicBlock>(Block);
4015+ for (VPRecipeBase &R : *VPBB) {
4016+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
4017+ if (!RepR || RepR->getOpcode () != Instruction::Load ||
4018+ !RepR->isPredicated ())
4019+ continue ;
4020+
4021+ VPValue *Addr = RepR->getOperand (0 );
4022+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue (Addr, SE, L);
4023+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
4024+ LoadsByAddress[AddrSCEV].push_back (RepR);
4025+ }
4026+ }
4027+
4028+ // For each address, collect loads with complementary masks, sort by
4029+ // dominance, and use the earliest load.
4030+ for (auto &[Addr, Loads] : LoadsByAddress) {
4031+ if (Loads.size () < 2 )
4032+ continue ;
4033+
4034+ // Collect groups of loads with complementary masks.
4035+ SmallVector<SmallVector<VPReplicateRecipe *, 4 >> LoadGroups;
4036+ for (VPReplicateRecipe *&LoadI : Loads) {
4037+ if (!LoadI)
4038+ continue ;
4039+
4040+ VPValue *MaskI = LoadI->getMask ();
4041+ Type *TypeI = TypeInfo.inferScalarType (LoadI);
4042+ SmallVector<VPReplicateRecipe *, 4 > Group;
4043+ Group.push_back (LoadI);
4044+ LoadI = nullptr ;
4045+
4046+ // Find all loads with the same type.
4047+ for (VPReplicateRecipe *&LoadJ : Loads) {
4048+ if (!LoadJ)
4049+ continue ;
4050+
4051+ Type *TypeJ = TypeInfo.inferScalarType (LoadJ);
4052+ if (TypeI == TypeJ) {
4053+ Group.push_back (LoadJ);
4054+ LoadJ = nullptr ;
4055+ }
4056+ }
4057+
4058+ // Check if any load in the group has a complementary mask with another,
4059+ // that is M1 == NOT(M2) or M2 == NOT(M1).
4060+ bool HasComplementaryMask =
4061+ any_of (drop_begin (Group), [MaskI](VPReplicateRecipe *Load) {
4062+ VPValue *MaskJ = Load->getMask ();
4063+ return match (MaskI, m_Not (m_Specific (MaskJ))) ||
4064+ match (MaskJ, m_Not (m_Specific (MaskI)));
4065+ });
4066+
4067+ if (HasComplementaryMask)
4068+ LoadGroups.push_back (std::move (Group));
4069+ }
4070+
4071+ // For each group, check memory dependencies and hoist the earliest load.
4072+ for (auto &Group : LoadGroups) {
4073+ // Sort loads by dominance order, with earliest (most dominating) first.
4074+ sort (Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
4075+ return VPDT.properlyDominates (A, B);
4076+ });
4077+
4078+ VPReplicateRecipe *EarliestLoad = Group.front ();
4079+ VPBasicBlock *FirstBB = EarliestLoad->getParent ();
4080+ VPBasicBlock *LastBB = Group.back ()->getParent ();
4081+
4082+ // Check that the load doesn't alias with stores between first and last.
4083+ if (!canHoistLoadWithNoAliasCheck (EarliestLoad, FirstBB, LastBB))
4084+ continue ;
4085+
4086+ // Collect common metadata from all loads in the group.
4087+ VPIRMetadata CommonMetadata = getCommonLoadMetadata (Group);
4088+
4089+ // Create an unpredicated version of the earliest load with common
4090+ // metadata.
4091+ auto *UnpredicatedLoad = new VPReplicateRecipe (
4092+ EarliestLoad->getUnderlyingInstr (), {EarliestLoad->getOperand (0 )},
4093+ /* IsSingleScalar=*/ false , /* Mask=*/ nullptr , CommonMetadata);
4094+
4095+ UnpredicatedLoad->insertBefore (EarliestLoad);
4096+
4097+ // Replace all loads in the group with the unpredicated load.
4098+ for (VPReplicateRecipe *Load : Group) {
4099+ Load->replaceAllUsesWith (UnpredicatedLoad);
4100+ Load->eraseFromParent ();
4101+ }
4102+ }
4103+ }
4104+ }
4105+
39614106void VPlanTransforms::materializeConstantVectorTripCount (
39624107 VPlan &Plan, ElementCount BestVF, unsigned BestUF,
39634108 PredicatedScalarEvolution &PSE) {
0 commit comments