Skip to content
8 changes: 4 additions & 4 deletions llvm/include/llvm/Analysis/ScopedNoAliasAA.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
const CallBase *Call2, AAQueryInfo &AAQI);

LLVM_ABI void
LLVM_ABI static void
collectScopedDomains(const MDNode *NoAlias,
SmallPtrSetImpl<const MDNode *> &Domains) const;
SmallPtrSetImpl<const MDNode *> &Domains);

private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
const MDNode *NoAlias);
};

/// Analysis pass providing a never-invalidated alias analysis result.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/ScopedNoAliasAA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,

/// Collect the set of scoped domains relevant to the noalias scopes.
void ScopedNoAliasAAResult::collectScopedDomains(
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
if (!NoAlias)
return;
assert(Domains.empty() && "Domains should be empty");
Expand All @@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
}

bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
const MDNode *NoAlias) const {
const MDNode *NoAlias) {
if (!Scopes || !NoAlias)
return true;

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
Expand Down Expand Up @@ -981,6 +982,13 @@ class VPIRMetadata {
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
/// nodes that are common to both.
void intersect(const VPIRMetadata &MD);

/// Get metadata of kind \p Kind. Returns nullptr if not found.
MDNode *getMetadata(unsigned Kind) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter if there is more than one entry of kind Kind in the list? Is it worth updating the comments above the function to say something like "this returns the first instance of kind \p Kind ..."?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There must be at most one entry for each kind (to add multiple entries for the same kind would require kind-dependent merging).

auto It =
find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
return It != Metadata.end() ? It->second : nullptr;
}
};

/// This is a concrete Recipe that models a single VPlan-level instruction.
Expand Down
54 changes: 54 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,20 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
Expand Down Expand Up @@ -2401,6 +2406,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeDeadRecipes, Plan);

runPass(createAndOptimizeReplicateRegions, Plan);
runPass(hoistInvariantLoads, Plan);
runPass(mergeBlocksIntoPredecessors, Plan);
runPass(licm, Plan);
}
Expand Down Expand Up @@ -3914,6 +3920,54 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
}
}

void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();

// Collect candidate loads with invariant addresses and noalias scopes
// metadata and memory-writing recipes with noalias metadata.
SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
SmallVector<MemoryLocation> Stores;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : *VPBB) {
// Only handle single-scalar replicated loads with invariant addresses.
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
if (RepR->isPredicated() || !RepR->isSingleScalar() ||
RepR->getOpcode() != Instruction::Load)
continue;

VPValue *Addr = RepR->getOperand(0);
if (Addr->isDefinedOutsideLoopRegions()) {
MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
if (!Loc.AATags.Scope)
continue;
CandidateLoads.push_back({RepR, Loc});
}
}
if (R.mayWriteToMemory()) {
auto Loc = vputils::getMemoryLocation(R);
if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
return;
Stores.push_back(*Loc);
}
}
}

VPBasicBlock *Preheader = Plan.getVectorPreheader();
for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
// Hoist the load to the preheader if it doesn't alias with any stores
// according to the noalias metadata. Other loads should have been hoisted
// by other passes
const AAMDNodes &LoadAA = LoadLoc.AATags;
if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
return !ScopedNoAliasAAResult::mayAliasInScopes(
LoadAA.Scope, StoreLoc.AATags.NoAlias);
})) {
LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
}
}
}

void VPlanTransforms::materializeConstantVectorTripCount(
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
PredicatedScalarEvolution &PSE) {
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ struct VPlanTransforms {
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);

/// Hoist single-scalar loads with invariant addresses out of the vector loop
/// to the preheader, if they are proven not to alias with any stores in the
/// plan using noalias metadata.
static void hoistInvariantLoads(VPlan &Plan);

// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"

using namespace llvm;
Expand Down Expand Up @@ -393,3 +394,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
return VPB->getNumSuccessors() == 2 &&
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
}

std::optional<MemoryLocation>
vputils::getMemoryLocation(const VPRecipeBase &R) {
return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
.Case<VPWidenMemoryRecipe, VPInterleaveBase, VPReplicateRecipe>(
[](auto *S) {
MemoryLocation Loc;
// Populate noalias metadata from VPIRMetadata.
if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
Loc.AATags.NoAlias = NoAliasMD;
if (MDNode *AliasScopeMD =
S->getMetadata(LLVMContext::MD_alias_scope))
Loc.AATags.Scope = AliasScopeMD;
return Loc;
})
.Default([](auto *) { return std::nullopt; });
}
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/Support/Compiler.h"

namespace llvm {
class MemoryLocation;
class ScalarEvolution;
class SCEV;
} // namespace llvm
Expand Down Expand Up @@ -74,6 +75,11 @@ getRecipesForUncountableExit(VPlan &Plan,
SmallVectorImpl<VPRecipeBase *> &Recipes,
SmallVectorImpl<VPRecipeBase *> &GEPs);

/// Return a MemoryLocation for \p R with noalias metadata populated from
/// \p R, if the recipe is supported and std::nullopt otherwise. The pointer of
/// the location is conservatively set to nullptr.
std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth adding a comment saying something like returns std::nullopt if recipe is unsupported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!


/// Extracts and returns NoWrap and FastMath flags from the induction binop in
/// \p ID.
inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; DEFAULT-NEXT: [[ENTRY:.*:]]
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; DEFAULT: [[VECTOR_MEMCHECK]]:
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
Expand Down Expand Up @@ -427,16 +427,16 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
Expand All @@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This transformation seems to be taking place without any noalias metadata. I wonder if it's worth splitting the PR up into two parts - a first patch that does the optimisation without using the metadata, and a follow-on to use the metadata?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should only ever do the transformation using noalias metadata (the pointer of MemoryLocation is set to nullptr, so cannot be used to determine noalias).

The way the diff is highligthed here makes it a bit difficult to see, but we hoist [TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]] together with some instructions used by it.

The only store in the loop ( store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]] ) also has noalias metadata not aliasing the load

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I must be missing something because I don't see any metadata on the original scalar loop:

entry:
  br label %loop
loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %x.ext = zext i16 %x to i64
  %l = load i64, ptr %src, align 8
  %and = and i64 %l, %x.ext
  %trunc = trunc i64 %and to i8
  %gep = getelementptr i8, ptr %dst, i64 %iv
  store i8 %trunc, ptr %gep, align 1
  %iv.next = add i64 %iv, 1
  %ec = icmp eq i64 %iv.next, 1000
  br i1 %ec, label %exit, label %loop
exit:
  ret void

and the function pointer arguments do not have noalias attached either.

Is it the loop vectoriser itself that is first adding the metadata, then using it to perform the transformation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is using the noalias metadata the loop-vectorizer generates when emitting memory runtime checks; it will also work for cases where we already have !noalias in the scalar loop, but those cases are much less interesting I think, as then other transformations will do the hoisting earlier.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK that makes sense, thanks for explaining!

; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; DEFAULT: vec.epilog.vector.body:
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP6]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT3]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
Expand Down
Loading
Loading