From 371302a05aa3db2e8116682e6f521a7660f7b87e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 11 Nov 2025 11:37:55 +0000 Subject: [PATCH 1/2] [VPlan] Hoist predicated loads with replicate recipes This commit implements hoisting of predicated loads that are executed on both paths with complementary predicates (P and NOT P). When such loads access the same address, they can be hoisted to the loop entry as a single unpredicated load, eliminating branching overhead. Key features: - Uses SCEV to group loads by address, handling different GEP instructions that compute the same address - Checks for complementary masks (P and NOT P) - Clones address computations when needed to maintain SSA form - Hoists as unpredicated VPReplicateRecipe (no widening yet) Simp --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 147 ++++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 6 + ...predicated-loads-with-predicated-stores.ll | 210 +++++--------- .../LoopVectorize/hoist-predicated-loads.ll | 268 +++--------------- 5 files changed, 263 insertions(+), 369 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 267be2e243fc3..1dac31c5b3eb3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8317,6 +8317,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, if (auto Plan = tryToBuildVPlanWithVPRecipes( std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { // Now optimize the initial VPlan. + VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 25557f1d5d651..158fcd5370473 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -42,6 +42,8 @@ #include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#define DEBUG_TYPE "loop-vectorize" + using namespace llvm; using namespace VPlanPatternMatch; @@ -3974,6 +3976,151 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { } } +// Returns the intersection of metadata from a group of loads. +static VPIRMetadata getCommonLoadMetadata(ArrayRef Loads) { + VPIRMetadata CommonMetadata = *Loads.front(); + for (VPReplicateRecipe *Load : drop_begin(Loads)) + CommonMetadata.intersect(*Load); + return CommonMetadata; +} + +// Check if a load can be hoisted by verifying it doesn't alias with any stores +// in blocks between FirstBB and LastBB using scoped noalias metadata. +static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, + VPBasicBlock *FirstBB, + VPBasicBlock *LastBB) { + // Get the load's memory location and check if it aliases with any stores + // using scoped noalias metadata. + auto LoadLoc = vputils::getMemoryLocation(*Load); + if (!LoadLoc || !LoadLoc->AATags.Scope) + return false; + + const AAMDNodes &LoadAA = LoadLoc->AATags; + for (VPBlockBase *Block = FirstBB; Block; + Block = Block->getSingleSuccessor()) { + // This function assumes a simple linear chain of blocks. If there are + // multiple successors, we would need more complex analysis. + assert(Block->getNumSuccessors() <= 1 && + "Expected at most one successor in block chain"); + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + if (R.mayWriteToMemory()) { + auto Loc = vputils::getMemoryLocation(R); + // Bail out if we can't get the location or if the scoped noalias + // metadata indicates potential aliasing. + if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( + LoadAA.Scope, Loc->AATags.NoAlias)) + return false; + } + } + + if (Block == LastBB) + break; + } + return true; +} + +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPTypeAnalysis TypeInfo(Plan); + VPDominatorTree VPDT(Plan); + + // Group predicated loads by their address SCEV. + MapVector> LoadsByAddress; + for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + auto *RepR = dyn_cast(&R); + if (!RepR || RepR->getOpcode() != Instruction::Load || + !RepR->isPredicated()) + continue; + + VPValue *Addr = RepR->getOperand(0); + const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); + if (!isa(AddrSCEV)) + LoadsByAddress[AddrSCEV].push_back(RepR); + } + } + + // For each address, collect loads with complementary masks, sort by + // dominance, and use the earliest load. + for (auto &[Addr, Loads] : LoadsByAddress) { + if (Loads.size() < 2) + continue; + + // Collect groups of loads with complementary masks. + SmallVector> LoadGroups; + for (VPReplicateRecipe *&LoadI : Loads) { + if (!LoadI) + continue; + + VPValue *MaskI = LoadI->getMask(); + Type *TypeI = TypeInfo.inferScalarType(LoadI); + SmallVector Group; + Group.push_back(LoadI); + LoadI = nullptr; + + // Find all loads with the same type. + for (VPReplicateRecipe *&LoadJ : Loads) { + if (!LoadJ) + continue; + + Type *TypeJ = TypeInfo.inferScalarType(LoadJ); + if (TypeI == TypeJ) { + Group.push_back(LoadJ); + LoadJ = nullptr; + } + } + + // Check if any load in the group has a complementary mask with another, + // that is M1 == NOT(M2) or M2 == NOT(M1). + bool HasComplementaryMask = + any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { + VPValue *MaskJ = Load->getMask(); + return match(MaskI, m_Not(m_Specific(MaskJ))) || + match(MaskJ, m_Not(m_Specific(MaskI))); + }); + + if (HasComplementaryMask) + LoadGroups.push_back(std::move(Group)); + } + + // For each group, check memory dependencies and hoist the earliest load. + for (auto &Group : LoadGroups) { + // Sort loads by dominance order, with earliest (most dominating) first. + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + VPReplicateRecipe *EarliestLoad = Group.front(); + VPBasicBlock *FirstBB = EarliestLoad->getParent(); + VPBasicBlock *LastBB = Group.back()->getParent(); + + // Check that the load doesn't alias with stores between first and last. + if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) + continue; + + // Collect common metadata from all loads in the group. + VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); + + // Create an unpredicated version of the earliest load with common + // metadata. + auto *UnpredicatedLoad = new VPReplicateRecipe( + EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, + /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad, CommonMetadata); + + UnpredicatedLoad->insertBefore(EarliestLoad); + + // Replace all loads in the group with the unpredicated load. + for (VPReplicateRecipe *Load : Group) { + Load->replaceAllUsesWith(UnpredicatedLoad); + Load->eraseFromParent(); + } + } + } +} + void VPlanTransforms::materializeConstantVectorTripCount( VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 708ea4185e1cb..83310a7839244 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -314,6 +314,12 @@ struct VPlanTransforms { /// plan using noalias metadata. static void hoistInvariantLoads(VPlan &Plan); + /// Hoist predicated loads from the same address to the loop entry block, if + /// they are guaranteed to execute on both paths (i.e., in replicate regions + /// with complementary masks P and NOT P). + static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L); + // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. static void diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index ac767c68e0b25..87942911e915f 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -21,32 +21,20 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5) +; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -56,48 +44,30 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE9]] ], [ [[TMP29]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] -; CHECK: [[PRED_LOAD_IF12]]: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] -; CHECK: [[PRED_LOAD_CONTINUE13]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP34]], %[[PRED_LOAD_IF12]] ] -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK: [[PRED_STORE_IF8]]: ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 ; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_IF16]]: +; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 ; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_CONTINUE17]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_CONTINUE11]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -319,7 +289,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -327,79 +297,59 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META30:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP20]], splat (i32 5) +; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; CHECK: [[PRED_STORE_CONTINUE22]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] -; CHECK: [[PRED_LOAD_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP30]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE24]] -; CHECK: [[PRED_LOAD_CONTINUE24]]: -; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE22]] ], [ [[TMP31]], %[[PRED_LOAD_IF23]] ] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] -; CHECK: [[PRED_LOAD_IF25]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META30]] -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP35]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE26]] -; CHECK: [[PRED_LOAD_CONTINUE26]]: -; CHECK-NEXT: [[TMP37:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP36]], %[[PRED_LOAD_IF25]] ] -; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP37]], splat (i32 10) +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] +; CHECK: [[PRED_STORE_CONTINUE24]]: +; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] -; CHECK: [[PRED_STORE_IF27]]: +; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] +; CHECK: [[PRED_STORE_IF25]]: ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] +; CHECK: [[PRED_STORE_CONTINUE26]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_IF29]]: +; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_IF27]]: ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 ; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]] -; CHECK: [[PRED_STORE_CONTINUE30]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_CONTINUE28]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] @@ -609,7 +559,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE26:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -617,62 +567,56 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META53:![0-9]+]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] -; CHECK: [[PRED_LOAD_IF19]]: +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] +; CHECK: [[PRED_STORE_CONTINUE20]]: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE20]] -; CHECK: [[PRED_LOAD_CONTINUE20]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF19]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] +; CHECK: [[PRED_STORE_CONTINUE22]]: ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP29]], align 4, !alias.scope [[META53]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] ; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] ; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP33]], align 4, !alias.scope [[META53]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP30]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] ; CHECK: [[PRED_STORE_CONTINUE26]]: +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_IF27]]: +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] +; CHECK: [[PRED_STORE_CONTINUE28]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index e4c893f5269bb..c98dadb82ca95 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -21,51 +21,20 @@ define void @test(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP34:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP34]], i32 0 -; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP34]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP36:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -471,51 +440,20 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP15]], splat (i1 true) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META39:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META39]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP35]], i32 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ] +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP24]], splat (i32 10) -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP31]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP33]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP29]], <2 x i32> [[TMP25]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP24]], <2 x i32> [[TMP25]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -642,50 +580,19 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META49]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP23]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META49]] -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP24]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP24]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] @@ -752,50 +659,19 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP13]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP14]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] -; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] -; CHECK: [[PRED_LOAD_CONTINUE7]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP15]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] -; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META59]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] -; CHECK: [[PRED_LOAD_CONTINUE9]]: -; CHECK-NEXT: [[TMP26:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP25]], %[[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META59]] -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP26]], i32 [[TMP29]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] -; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP31:%.*]] = phi <2 x i32> [ [[TMP26]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP30]], %[[PRED_LOAD_IF10]] ] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] @@ -1045,55 +921,15 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 2, !alias.scope [[META68:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 8 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 8 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i16> poison, i16 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP18]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP19]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP19]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP14]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP73:![0-9]+]] @@ -1145,55 +981,15 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE8:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> [[TMP5]], ptr [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 2, !alias.scope [[META75:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP10]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] -; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP13]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP14]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] -; CHECK: [[PRED_LOAD_CONTINUE4]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i16> [ [[TMP11]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF3]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] -; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP17]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] -; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_IF7]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75]] -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP22]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] -; CHECK: [[PRED_LOAD_CONTINUE8]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi <2 x i16> [ [[TMP20]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP23]], %[[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <2 x i16> [[TMP24]], <2 x i16> [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 -; CHECK-NEXT: store i16 [[TMP25]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] +; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP80:![0-9]+]] @@ -1262,7 +1058,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62:![0-9]+]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: @@ -1270,7 +1066,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] ; CHECK: [[PRED_LOAD_IF2]]: -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP21]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE3]] ; CHECK: [[PRED_LOAD_CONTINUE3]]: @@ -1280,7 +1076,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP25]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] ; CHECK: [[PRED_LOAD_IF4]]: -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE5]] ; CHECK: [[PRED_LOAD_CONTINUE5]]: @@ -1288,7 +1084,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP25]], i32 1 ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] ; CHECK: [[PRED_LOAD_IF6]]: -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP31]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] ; CHECK: [[PRED_LOAD_CONTINUE7]]: @@ -1297,7 +1093,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK: [[PRED_LOAD_IF8]]: -; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x i32> poison, i32 [[TMP36]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]] ; CHECK: [[PRED_LOAD_CONTINUE9]]: @@ -1305,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]] ; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META62]] +; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]] ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP38]], i32 [[TMP40]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] ; CHECK: [[PRED_LOAD_CONTINUE11]]: @@ -1313,10 +1109,10 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META65:![0-9]+]], !noalias [[META62]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP67:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP87:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH]]: From d5ec62e37a8ea9cb8ce3b48b86dc6a2181a2f825 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 19 Nov 2025 16:23:28 +0000 Subject: [PATCH 2/2] [VPlan] Sink predicated stores with complementary masks. Extend the logic to hoist predicated loads (https://github.com/llvm/llvm-project/pull/168373) to sink predicated stores with complementary masks in a similar fashion. The patch refactors some of the existing logic for legality checks to be shared between hosting and sinking, and adds a new sinking transform on top. With respect to the legality checks, for sinking stores the code also checks if there are any aliasing stores that may alias, not only loads. --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 314 +++++++++++++----- .../Transforms/Vectorize/VPlanTransforms.h | 7 + ...predicated-loads-with-predicated-stores.ll | 274 +++++---------- 4 files changed, 315 insertions(+), 281 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1dac31c5b3eb3..96294727514e2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8318,6 +8318,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { // Now optimize the initial VPlan. VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop); + VPlanTransforms::sinkPredicatedStores(*Plan, *PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 158fcd5370473..74de9c5cbdac4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3976,42 +3976,67 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { } } -// Returns the intersection of metadata from a group of loads. -static VPIRMetadata getCommonLoadMetadata(ArrayRef Loads) { - VPIRMetadata CommonMetadata = *Loads.front(); - for (VPReplicateRecipe *Load : drop_begin(Loads)) - CommonMetadata.intersect(*Load); +// Collect common metadata from a group of replicate recipes by intersecting +// metadata from all recipes in the group. +static VPIRMetadata getCommonMetadata(ArrayRef Recipes) { + VPIRMetadata CommonMetadata = *Recipes.front(); + for (VPReplicateRecipe *Recipe : drop_begin(Recipes)) + CommonMetadata.intersect(*Recipe); return CommonMetadata; } -// Check if a load can be hoisted by verifying it doesn't alias with any stores -// in blocks between FirstBB and LastBB using scoped noalias metadata. -static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, - VPBasicBlock *FirstBB, - VPBasicBlock *LastBB) { - // Get the load's memory location and check if it aliases with any stores - // using scoped noalias metadata. - auto LoadLoc = vputils::getMemoryLocation(*Load); - if (!LoadLoc || !LoadLoc->AATags.Scope) +// Helper to check if we can prove no aliasing using scoped noalias metadata. +static bool canProveNoAlias(const AAMDNodes &AA1, const AAMDNodes &AA2) { + return AA1.Scope && AA2.NoAlias && + !ScopedNoAliasAAResult::mayAliasInScopes(AA1.Scope, AA2.NoAlias); +} + +// Check if a memory operation doesn't alias with memory operations in blocks +// between FirstBB and LastBB using scoped noalias metadata. +// For load hoisting, we only check writes in one direction. +// For store sinking, we check both reads and writes bidirectionally. +static bool canHoistOrSinkWithNoAliasCheck( + const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, + bool CheckReads, + const SmallPtrSetImpl *ExcludeRecipes = nullptr) { + if (!MemLoc.AATags.Scope) return false; - const AAMDNodes &LoadAA = LoadLoc->AATags; + const AAMDNodes &MemAA = MemLoc.AATags; + for (VPBlockBase *Block = FirstBB; Block; Block = Block->getSingleSuccessor()) { - // This function assumes a simple linear chain of blocks. If there are - // multiple successors, we would need more complex analysis. assert(Block->getNumSuccessors() <= 1 && "Expected at most one successor in block chain"); auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { - if (R.mayWriteToMemory()) { - auto Loc = vputils::getMemoryLocation(R); - // Bail out if we can't get the location or if the scoped noalias - // metadata indicates potential aliasing. - if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( - LoadAA.Scope, Loc->AATags.NoAlias)) - return false; + if (ExcludeRecipes && ExcludeRecipes->contains(&R)) + continue; + + // Skip recipes that don't need checking. + if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory())) + continue; + + auto Loc = vputils::getMemoryLocation(R); + if (!Loc) + // Conservatively assume aliasing for memory operations without + // location. We already filtered by + // mayWriteToMemory()/mayReadFromMemory() above. + return false; + + // Check for aliasing using scoped noalias metadata. + // For store sinking with CheckReads, we can prove no aliasing + // bidirectionally (either direction suffices). + if (CheckReads) { + if (canProveNoAlias(Loc->AATags, MemAA) || + canProveNoAlias(MemAA, Loc->AATags)) + continue; } + + // Check if the memory operations may alias in the standard direction. + if (ScopedNoAliasAAResult::mayAliasInScopes(MemAA.Scope, + Loc->AATags.NoAlias)) + return false; } if (Block == LastBB) @@ -4020,103 +4045,222 @@ static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, return true; } -void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, - const Loop *L) { +template +static SmallVector> +collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store, + "Only Load and Store opcodes supported"); + constexpr bool IsLoad = (Opcode == Instruction::Load); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPTypeAnalysis TypeInfo(Plan); - VPDominatorTree VPDT(Plan); - // Group predicated loads by their address SCEV. - MapVector> LoadsByAddress; + // Group predicated operations by their address SCEV. + MapVector> RecipesByAddress; for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { auto *RepR = dyn_cast(&R); - if (!RepR || RepR->getOpcode() != Instruction::Load || - !RepR->isPredicated()) + if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated()) continue; - VPValue *Addr = RepR->getOperand(0); + // For loads, operand 0 is address; for stores, operand 1 is address. + VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1); const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); if (!isa(AddrSCEV)) - LoadsByAddress[AddrSCEV].push_back(RepR); + RecipesByAddress[AddrSCEV].push_back(RepR); } } - // For each address, collect loads with complementary masks, sort by - // dominance, and use the earliest load. - for (auto &[Addr, Loads] : LoadsByAddress) { - if (Loads.size() < 2) + // For each address, collect operations with the same or complementary masks. + SmallVector> AllGroups; + for (auto &[Addr, Recipes] : RecipesByAddress) { + if (Recipes.size() < 2) continue; - // Collect groups of loads with complementary masks. - SmallVector> LoadGroups; - for (VPReplicateRecipe *&LoadI : Loads) { - if (!LoadI) + // Collect groups with the same or complementary masks. + for (VPReplicateRecipe *&RecipeI : Recipes) { + if (!RecipeI) continue; - VPValue *MaskI = LoadI->getMask(); - Type *TypeI = TypeInfo.inferScalarType(LoadI); + VPValue *MaskI = RecipeI->getMask(); + Type *TypeI = + TypeInfo.inferScalarType(IsLoad ? RecipeI : RecipeI->getOperand(0)); SmallVector Group; - Group.push_back(LoadI); - LoadI = nullptr; + Group.push_back(RecipeI); + RecipeI = nullptr; - // Find all loads with the same type. - for (VPReplicateRecipe *&LoadJ : Loads) { - if (!LoadJ) + // Find all operations with the same or complementary masks. + bool HasComplementaryMask = false; + for (VPReplicateRecipe *&RecipeJ : Recipes) { + if (!RecipeJ) continue; - Type *TypeJ = TypeInfo.inferScalarType(LoadJ); + VPValue *MaskJ = RecipeJ->getMask(); + Type *TypeJ = + TypeInfo.inferScalarType(IsLoad ? RecipeJ : RecipeJ->getOperand(0)); if (TypeI == TypeJ) { - Group.push_back(LoadJ); - LoadJ = nullptr; + // Check if any operation in the group has a complementary mask with + // another, that is M1 == NOT(M2) or M2 == NOT(M1). + HasComplementaryMask |= match(MaskI, m_Not(m_Specific(MaskJ))) || + match(MaskJ, m_Not(m_Specific(MaskI))); + Group.push_back(RecipeJ); + RecipeJ = nullptr; } } - // Check if any load in the group has a complementary mask with another, - // that is M1 == NOT(M2) or M2 == NOT(M1). - bool HasComplementaryMask = - any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { - VPValue *MaskJ = Load->getMask(); - return match(MaskI, m_Not(m_Specific(MaskJ))) || - match(MaskJ, m_Not(m_Specific(MaskI))); - }); + if (HasComplementaryMask) { + assert(Group.size() >= 2 && "must have at least 2 entries"); + AllGroups.push_back(std::move(Group)); + } + } + } + + return AllGroups; +} + +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + auto Groups = + collectComplementaryPredicatedMemOps(Plan, SE, L); + if (Groups.empty()) + return; + + VPDominatorTree VPDT(Plan); + + // Process each group of loads. + for (auto &Group : Groups) { + // Sort loads by dominance order, with earliest (most dominating) first. + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + // Try to use the earliest (most dominating) load to replace all others. + VPReplicateRecipe *EarliestLoad = Group[0]; + VPBasicBlock *FirstBB = EarliestLoad->getParent(); + VPBasicBlock *LastBB = Group.back()->getParent(); + + // Check that the load doesn't alias with stores between first and last. + auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad); + if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB, + /*CheckReads=*/false)) + continue; + + // Collect common metadata from all loads in the group. + VPIRMetadata CommonMetadata = getCommonMetadata(Group); + + // Create an unpredicated version of the earliest load with common + // metadata. + auto *UnpredicatedLoad = new VPReplicateRecipe( + EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, + /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad, + CommonMetadata); - if (HasComplementaryMask) - LoadGroups.push_back(std::move(Group)); + UnpredicatedLoad->insertBefore(EarliestLoad); + + // Replace all loads in the group with the unpredicated load. + for (VPReplicateRecipe *Load : Group) { + Load->replaceAllUsesWith(UnpredicatedLoad); + Load->eraseFromParent(); } + } +} - // For each group, check memory dependencies and hoist the earliest load. - for (auto &Group : LoadGroups) { - // Sort loads by dominance order, with earliest (most dominating) first. - sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { - return VPDT.properlyDominates(A, B); - }); +static bool canSinkStoreWithNoAliasCheck( + VPReplicateRecipe *Store, ArrayRef StoresToSink, + const SmallPtrSetImpl *AlreadySunkStores = nullptr) { + auto StoreLoc = vputils::getMemoryLocation(*Store); + if (!StoreLoc) + return false; - VPReplicateRecipe *EarliestLoad = Group.front(); - VPBasicBlock *FirstBB = EarliestLoad->getParent(); - VPBasicBlock *LastBB = Group.back()->getParent(); + SmallPtrSet StoresToSinkSet(StoresToSink.begin(), + StoresToSink.end()); + if (AlreadySunkStores) + StoresToSinkSet.insert(AlreadySunkStores->begin(), + AlreadySunkStores->end()); - // Check that the load doesn't alias with stores between first and last. - if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) + VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); + VPBasicBlock *LastBB = StoresToSink.back()->getParent(); + + if (StoreLoc->AATags.Scope) + return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, + /*CheckReads=*/true, + &StoresToSinkSet); + + // Without alias scope metadata, we conservatively require no memory + // operations between the stores being sunk. + for (VPBlockBase *Block = FirstBB; Block; + Block = Block->getSingleSuccessor()) { + auto *VPBB = cast(Block); + for (VPRecipeBase &R : *VPBB) { + if (StoresToSinkSet.contains(&R)) continue; - // Collect common metadata from all loads in the group. - VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); + if (R.mayReadFromMemory() || R.mayWriteToMemory()) + return false; + } - // Create an unpredicated version of the earliest load with common - // metadata. - auto *UnpredicatedLoad = new VPReplicateRecipe( - EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, - /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad, CommonMetadata); + if (Block == LastBB) + break; + } - UnpredicatedLoad->insertBefore(EarliestLoad); + return true; +} - // Replace all loads in the group with the unpredicated load. - for (VPReplicateRecipe *Load : Group) { - Load->replaceAllUsesWith(UnpredicatedLoad); - Load->eraseFromParent(); - } +void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + auto Groups = + collectComplementaryPredicatedMemOps(Plan, SE, L); + + if (Groups.empty()) + return; + + VPDominatorTree VPDT(Plan); + + // Track stores from all groups that have been successfully sunk to exclude + // them from alias checks for subsequent groups. + SmallPtrSet AlreadySunkStores; + + for (auto &Group : Groups) { + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + if (!canSinkStoreWithNoAliasCheck(Group[0], Group, &AlreadySunkStores)) + continue; + + // Use the last (most dominated) store's location for the unconditional + // store. + VPReplicateRecipe *LastStore = Group.back(); + VPBasicBlock *InsertBB = LastStore->getParent(); + + // Collect common alias metadata from all stores in the group. + VPIRMetadata CommonMetadata = getCommonMetadata(Group); + + // Build select chain for stored values. + VPValue *SelectedValue = Group[0]->getOperand(0); + VPBuilder Builder(InsertBB, LastStore->getIterator()); + + for (unsigned I = 1; I < Group.size(); ++I) { + VPValue *Mask = Group[I]->getMask(); + VPValue *Value = Group[I]->getOperand(0); + SelectedValue = Builder.createSelect(Mask, Value, SelectedValue, + Group[I]->getDebugLoc()); + } + + // Create unconditional store with selected value and common metadata. + VPValue *AddrVPValue = Group[0]->getOperand(1); + SmallVector Operands = {SelectedValue, AddrVPValue}; + auto *SI = cast(Group[0]->getUnderlyingInstr()); + auto *UnpredicatedStore = + new VPReplicateRecipe(SI, Operands, /*IsSingleScalar=*/false, + /*Mask=*/nullptr, *LastStore, CommonMetadata); + UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator()); + + // Track and remove all predicated stores from the group. + for (VPReplicateRecipe *Store : Group) { + AlreadySunkStores.insert(Store); + Store->eraseFromParent(); } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 83310a7839244..f5f415f379915 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -320,6 +320,13 @@ struct VPlanTransforms { static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, const Loop *L); + /// Sink predicated stores to the same address with complementary predicates + /// (P and NOT P) to an unconditional store with select recipes for the + /// stored values. This eliminates branching overhead when all paths + /// unconditionally store to the same location. + static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, + const Loop *L); + // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. static void diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index 87942911e915f..c065fe96ab26c 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -21,13 +21,12 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] @@ -35,39 +34,14 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_IF10]]: -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -134,7 +108,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -162,57 +136,32 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[PRED_LOAD_CONTINUE11]]: ; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; CHECK: [[PRED_STORE_IF12]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]] -; CHECK: [[PRED_LOAD_IF14]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] +; CHECK: [[PRED_LOAD_IF12]]: ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] -; CHECK: [[PRED_LOAD_CONTINUE15]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE13]] ], [ [[TMP29]], %[[PRED_LOAD_IF14]] ] +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] +; CHECK: [[PRED_LOAD_CONTINUE13]]: +; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP29]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF16:.*]], label %[[PRED_LOAD_CONTINUE17:.*]] -; CHECK: [[PRED_LOAD_IF16]]: +; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_IF14]]: ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE17]] -; CHECK: [[PRED_LOAD_CONTINUE17]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP34]], %[[PRED_LOAD_IF16]] ] +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_CONTINUE15]]: +; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ] ; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; CHECK: [[PRED_STORE_IF18]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; CHECK: [[PRED_STORE_CONTINUE19]]: -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_IF20]]: -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_CONTINUE21]]: +; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0 +; CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP21]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP37]], i32 1 +; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP24]], align 4, !alias.scope [[META19]], !noalias [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -289,13 +238,12 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -304,7 +252,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] @@ -317,39 +265,14 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] -; CHECK: [[PRED_STORE_CONTINUE24]]: ; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] -; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 -; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_CONTINUE26]]: -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_IF27]]: -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 -; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP22]], i32 1 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] @@ -418,7 +341,7 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -446,40 +369,31 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK: [[PRED_LOAD_CONTINUE11]]: ; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; CHECK: [[PRED_STORE_IF12]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] +; CHECK: [[PRED_LOAD_IF12]]: ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] +; CHECK: [[PRED_LOAD_CONTINUE13]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP23]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_IF16]]: +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_IF14]]: ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP33]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_CONTINUE17]]: +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP25]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_CONTINUE15]]: +; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP22]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP32]], %[[PRED_LOAD_IF14]] ] +; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP33]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0 +; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP21]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP28]], i32 1 +; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP24]], align 4, !alias.scope [[META42]], !noalias [[META35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] @@ -559,13 +473,12 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -574,7 +487,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] @@ -587,36 +500,13 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] -; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] -; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_CONTINUE26]]: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_IF27]]: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]] @@ -685,45 +575,37 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP15]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; CHECK: [[PRED_STORE_IF6]]: +; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7]] +; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP16]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] +; CHECK: [[PRED_LOAD_CONTINUE7]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 10, ptr [[TMP18]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 10, ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP18]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 +; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP66:![0-9]+]]