diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 356d759b94799..10af442b05d62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8311,6 +8311,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, if (auto Plan = tryToBuildVPlanWithVPRecipes( std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { // Now optimize the initial VPlan. + VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 26563242de283..4f9c100b249a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -42,6 +42,8 @@ #include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#define DEBUG_TYPE "loop-vectorize" + using namespace llvm; using namespace VPlanPatternMatch; @@ -3968,6 +3970,177 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { } } +// Returns the intersection of metadata from a group of loads. +static VPIRMetadata getCommonLoadMetadata(ArrayRef Loads) { + VPIRMetadata CommonMetadata = *Loads.front(); + for (VPReplicateRecipe *Load : drop_begin(Loads)) + CommonMetadata.intersect(*Load); + return CommonMetadata; +} + +// Check if a load can be hoisted by verifying it doesn't alias with any stores +// in blocks between FirstBB and LastBB using scoped noalias metadata. +static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, + VPBasicBlock *FirstBB, + VPBasicBlock *LastBB) { + // Get the load's memory location and check if it aliases with any stores + // using scoped noalias metadata. + auto LoadLoc = vputils::getMemoryLocation(*Load); + if (!LoadLoc || !LoadLoc->AATags.Scope) + return false; + + const AAMDNodes &LoadAA = LoadLoc->AATags; + for (VPBlockBase *Block = FirstBB; Block; + Block = Block->getSingleSuccessor()) { + // This function assumes a simple linear chain of blocks. If there are + // multiple successors, we would need more complex analysis. + assert(Block->getNumSuccessors() <= 1 && + "Expected at most one successor in block chain"); + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + if (R.mayWriteToMemory()) { + auto Loc = vputils::getMemoryLocation(R); + // Bail out if we can't get the location or if the scoped noalias + // metadata indicates potential aliasing. + if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( + LoadAA.Scope, Loc->AATags.NoAlias)) + return false; + } + } + + if (Block == LastBB) + break; + } + return true; +} + +/// Check if \p Addr accesses consecutive memory locations of type \p LoadTy. +static bool isConsecutiveLoad(VPValue *Addr, Type *LoadTy, ScalarEvolution &SE, + const DataLayout &DL, const Loop *L) { + using namespace SCEVPatternMatch; + const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); + const SCEV *StepSCEV; + if (!match(AddrSCEV, m_scev_AffineAddRec(m_SCEV(), m_SCEV(StepSCEV), + m_SpecificLoop(L)))) + return false; + + TypeSize TS = DL.getTypeStoreSize(LoadTy); + const SCEV *ElementSizeSCEV = SE.getSizeOfExpr(StepSCEV->getType(), TS); + return SE.isKnownPositive(StepSCEV) && StepSCEV == ElementSizeSCEV; +} + +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPTypeAnalysis TypeInfo(Plan); + VPDominatorTree VPDT(Plan); + + // Group predicated loads by their address SCEV. + MapVector> LoadsByAddress; + for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + auto *RepR = dyn_cast(&R); + if (!RepR || RepR->getOpcode() != Instruction::Load || + !RepR->isPredicated()) + continue; + + VPValue *Addr = RepR->getOperand(0); + const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); + if (!isa(AddrSCEV)) + LoadsByAddress[AddrSCEV].push_back(RepR); + } + } + + // For each address, collect loads with complementary masks, sort by + // dominance, and use the earliest load. + for (auto &[Addr, Loads] : LoadsByAddress) { + if (Loads.size() < 2) + continue; + + // Collect groups of loads with complementary masks. + SmallVector> LoadGroups; + for (VPReplicateRecipe *&LoadI : Loads) { + if (!LoadI) + continue; + + VPValue *MaskI = LoadI->getMask(); + Type *TypeI = TypeInfo.inferScalarType(LoadI); + SmallVector Group; + Group.push_back(LoadI); + LoadI = nullptr; + + // Find all loads with the same type. + for (VPReplicateRecipe *&LoadJ : Loads) { + if (!LoadJ) + continue; + + Type *TypeJ = TypeInfo.inferScalarType(LoadJ); + if (TypeI == TypeJ) { + Group.push_back(LoadJ); + LoadJ = nullptr; + } + } + + // Check if any load in the group has a complementary mask with another, + // that is M1 == NOT(M2) or M2 == NOT(M1). + bool HasComplementaryMask = + any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { + VPValue *MaskJ = Load->getMask(); + return match(MaskI, m_Not(m_Specific(MaskJ))) || + match(MaskJ, m_Not(m_Specific(MaskI))); + }); + + if (HasComplementaryMask) + LoadGroups.push_back(std::move(Group)); + } + + // For each group, check memory dependencies and hoist the earliest load. + for (auto &Group : LoadGroups) { + // Sort loads by dominance order, with earliest (most dominating) first. + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + VPReplicateRecipe *EarliestLoad = Group.front(); + VPBasicBlock *FirstBB = EarliestLoad->getParent(); + VPBasicBlock *LastBB = Group.back()->getParent(); + + // Check that the load doesn't alias with stores between first and last. + if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) + continue; + + // Collect common metadata from all loads in the group. + VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); + + Type *LoadTy = TypeInfo.inferScalarType(EarliestLoad); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + auto *LI = cast(EarliestLoad->getUnderlyingInstr()); + VPValue *NewLoad; + // Check if the load is consecutive to determine whether to widen it. + if (isConsecutiveLoad(EarliestLoad->getOperand(0), LoadTy, SE, DL, L)) { + auto *WidenedLoad = new VPWidenLoadRecipe( + *LI, EarliestLoad->getOperand(0), /*Mask=*/nullptr, + /*Consecutive=*/true, /*Reverse=*/false, CommonMetadata, + LI->getDebugLoc()); + NewLoad = WidenedLoad; + } else { + auto *UnpredicatedLoad = new VPReplicateRecipe( + LI, {EarliestLoad->getOperand(0)}, /*IsSingleScalar=*/false, + /*Mask=*/nullptr, CommonMetadata); + NewLoad = UnpredicatedLoad; + } + NewLoad->getDefiningRecipe()->insertBefore(EarliestLoad); + + // Replace all loads in the group with the new load. + for (VPReplicateRecipe *Load : Group) { + Load->replaceAllUsesWith(NewLoad); + Load->eraseFromParent(); + } + } + } +} + void VPlanTransforms::materializeConstantVectorTripCount( VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 708ea4185e1cb..bf5015d58de59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -314,6 +314,13 @@ struct VPlanTransforms { /// plan using noalias metadata. static void hoistInvariantLoads(VPlan &Plan); + /// Hoist predicated loads from the same address to the loop entry block, if + /// they are guaranteed to execute on both paths (i.e., in replicate regions + /// with complementary masks P and NOT P). Consecutive loads are widened into + /// vector loads. + static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L); + // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. static void diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index d447a39aafd93..5edd76de5f680 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -28,32 +28,16 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5) +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -63,48 +47,30 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; CHECK: [[PRED_STORE_IF7]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE9]] ], [ [[TMP29]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] -; CHECK: [[PRED_LOAD_IF12]]: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] -; CHECK: [[PRED_LOAD_CONTINUE13]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP34]], %[[PRED_LOAD_IF12]] ] -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; CHECK: [[PRED_STORE_CONTINUE8]]: +; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; CHECK: [[PRED_STORE_IF9]]: ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 ; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; CHECK: [[PRED_STORE_CONTINUE10]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_IF16]]: +; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] +; CHECK: [[PRED_STORE_IF11]]: ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 ; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_CONTINUE17]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; CHECK: [[PRED_STORE_CONTINUE12]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -342,7 +308,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE29:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -350,79 +316,55 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META30:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP20]], splat (i32 5) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP19:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4, !alias.scope [[META30:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] +; CHECK: [[PRED_STORE_IF22]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE23]] +; CHECK: [[PRED_STORE_CONTINUE23]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] +; CHECK: [[PRED_STORE_IF24]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] -; CHECK: [[PRED_LOAD_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP30]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE24]] -; CHECK: [[PRED_LOAD_CONTINUE24]]: -; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE22]] ], [ [[TMP31]], %[[PRED_LOAD_IF23]] ] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] -; CHECK: [[PRED_LOAD_IF25]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP35]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE26]] -; CHECK: [[PRED_LOAD_CONTINUE26]]: -; CHECK-NEXT: [[TMP37:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP36]], %[[PRED_LOAD_IF25]] ] -; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP37]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE25]] +; CHECK: [[PRED_STORE_CONTINUE25]]: +; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] -; CHECK: [[PRED_STORE_IF27]]: +; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] +; CHECK: [[PRED_STORE_IF26]]: ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE27]] +; CHECK: [[PRED_STORE_CONTINUE27]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_IF29]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29]] +; CHECK: [[PRED_STORE_IF28]]: ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 ; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_CONTINUE30]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE29]] +; CHECK: [[PRED_STORE_CONTINUE29]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] @@ -648,7 +590,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE26:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE29:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -656,62 +598,54 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META53:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP19:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4, !alias.scope [[META53:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] +; CHECK: [[PRED_STORE_IF22]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE23]] +; CHECK: [[PRED_STORE_CONTINUE23]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] +; CHECK: [[PRED_STORE_IF24]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE25]] +; CHECK: [[PRED_STORE_CONTINUE25]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META53]] +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] +; CHECK: [[PRED_STORE_IF26]]: ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] -; CHECK: [[PRED_STORE_CONTINUE24]]: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 +; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE27]] +; CHECK: [[PRED_STORE_CONTINUE27]]: ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4, !alias.scope [[META53]] +; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29]] +; CHECK: [[PRED_STORE_IF28]]: ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_CONTINUE26]]: +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 +; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE29]] +; CHECK: [[PRED_STORE_CONTINUE29]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index b30d010aaf9c9..5c7c1bc8b1cd9 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -28,54 +28,17 @@ define void @test(ptr %dst, ptr %src, ptr %cond, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP5]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP34:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP34]], i32 0 -; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP34]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP36:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP36:%.*]] = load <2 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP5]], 2 ; CHECK-NEXT: [[TMP60:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP60]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -506,54 +469,17 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond, i3 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP5]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META36:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META39:![0-9]+]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP35]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP24:%.*]] = load <2 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META39:![0-9]+]] ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP24]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP24]], <2 x i32> [[TMP25]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP5]], 2 ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -685,54 +611,17 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP23]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP24]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP24]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP29:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -805,50 +694,19 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP13]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP14]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP15]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META59]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP25]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP26]], i32 [[TMP29]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP31:%.*]] = phi <2 x i32> [ [[TMP26]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP30]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] @@ -1105,55 +963,15 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 2, !alias.scope [[META70:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 8 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 2, !alias.scope [[META70]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 8 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 2, !alias.scope [[META70]] -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i16> poison, i16 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP18]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META70]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP19]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP19]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP14]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META73:![0-9]+]], !noalias [[META70]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META70:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META73:![0-9]+]], !noalias [[META70]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP75:![0-9]+]] @@ -1205,55 +1023,15 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> [[TMP5]], ptr [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 2, !alias.scope [[META77:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP10]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP13]], align 2, !alias.scope [[META77]] -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP14]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i16> [ [[TMP11]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP17]], align 2, !alias.scope [[META77]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META77]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP20]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META80:![0-9]+]], !noalias [[META77]] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META77:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META80:![0-9]+]], !noalias [[META77]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP82:![0-9]+]]