Skip to content

Commit e8b430c

Browse files
committed
[VPlan] Hoist loads with invariant addresses using scoped noalias metadata.
This patch implements a transform to hoists single-scalar replicated loads with invariant addresses out of the vector loop to the preheader when scoped noalias metadata proves they cannot alias with any stores in the loop. This enables hosting of loads we can prove do not alias any stores in the loop due to memory runtime checks added during vectorization.
1 parent 5d9d890 commit e8b430c

22 files changed

+223
-139
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
4646
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
4747
const CallBase *Call2, AAQueryInfo &AAQI);
4848

49-
LLVM_ABI void
49+
LLVM_ABI static void
5050
collectScopedDomains(const MDNode *NoAlias,
51-
SmallPtrSetImpl<const MDNode *> &Domains) const;
51+
SmallPtrSetImpl<const MDNode *> &Domains);
5252

53-
private:
54-
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
53+
LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
54+
const MDNode *NoAlias);
5555
};
5656

5757
/// Analysis pass providing a never-invalidated alias analysis result.

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
116116

117117
/// Collect the set of scoped domains relevant to the noalias scopes.
118118
void ScopedNoAliasAAResult::collectScopedDomains(
119-
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
119+
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
120120
if (!NoAlias)
121121
return;
122122
assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
127127
}
128128

129129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
130-
const MDNode *NoAlias) const {
130+
const MDNode *NoAlias) {
131131
if (!Scopes || !NoAlias)
132132
return true;
133133

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/ADT/ilist.h"
3333
#include "llvm/ADT/ilist_node.h"
3434
#include "llvm/Analysis/IVDescriptors.h"
35+
#include "llvm/Analysis/MemoryLocation.h"
3536
#include "llvm/Analysis/VectorUtils.h"
3637
#include "llvm/IR/DebugLoc.h"
3738
#include "llvm/IR/FMF.h"
@@ -965,6 +966,13 @@ class VPIRMetadata {
965966
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
966967
/// nodes that are common to both.
967968
void intersect(const VPIRMetadata &MD);
969+
970+
/// Get metadata of kind \p Kind. Returns nullptr if not found.
971+
MDNode *getMetadata(unsigned Kind) const {
972+
auto It = llvm::find_if(Metadata,
973+
[Kind](const auto &P) { return P.first == Kind; });
974+
return It != Metadata.end() ? It->second : nullptr;
975+
}
968976
};
969977

970978
/// This is a concrete Recipe that models a single VPlan-level instruction.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@
2424
#include "llvm/ADT/APInt.h"
2525
#include "llvm/ADT/PostOrderIterator.h"
2626
#include "llvm/ADT/STLExtras.h"
27+
#include "llvm/ADT/SetOperations.h"
2728
#include "llvm/ADT/SetVector.h"
29+
#include "llvm/ADT/SmallPtrSet.h"
2830
#include "llvm/ADT/TypeSwitch.h"
2931
#include "llvm/Analysis/IVDescriptors.h"
3032
#include "llvm/Analysis/InstSimplifyFolder.h"
3133
#include "llvm/Analysis/LoopInfo.h"
34+
#include "llvm/Analysis/MemoryLocation.h"
3235
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
36+
#include "llvm/Analysis/ScopedNoAliasAA.h"
3337
#include "llvm/Analysis/VectorUtils.h"
3438
#include "llvm/IR/Intrinsics.h"
3539
#include "llvm/IR/MDBuilder.h"
40+
#include "llvm/IR/Metadata.h"
3641
#include "llvm/Support/Casting.h"
3742
#include "llvm/Support/TypeSize.h"
3843
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -2330,6 +2335,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
23302335
runPass(removeDeadRecipes, Plan);
23312336

23322337
runPass(createAndOptimizeReplicateRegions, Plan);
2338+
runPass(hoistInvariantLoads, Plan);
23332339
runPass(mergeBlocksIntoPredecessors, Plan);
23342340
runPass(licm, Plan);
23352341
}
@@ -3843,6 +3849,57 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
38433849
}
38443850
}
38453851

3852+
void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
3853+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
3854+
3855+
// Collect candidate loads with invariant addresses and noalias scopes
3856+
// metadata and memory-writing recipes with noalias metadata.
3857+
SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
3858+
SmallVector<MemoryLocation> Stores;
3859+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3860+
vp_depth_first_shallow(LoopRegion))) {
3861+
if (!VPBB->getParent())
3862+
break;
3863+
3864+
for (VPRecipeBase &R : *VPBB) {
3865+
// Only handle single-scalar replicated loads with invariant addresses.
3866+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
3867+
if (RepR->isPredicated() || !RepR->isSingleScalar() ||
3868+
RepR->getOpcode() != Instruction::Load)
3869+
continue;
3870+
3871+
VPValue *Addr = RepR->getOperand(0);
3872+
if (Addr->isDefinedOutsideLoopRegions()) {
3873+
MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
3874+
if (!Loc.AATags.Scope)
3875+
continue;
3876+
CandidateLoads.push_back({RepR, Loc});
3877+
}
3878+
}
3879+
if (R.mayWriteToMemory()) {
3880+
auto Loc = vputils::getMemoryLocation(R);
3881+
if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
3882+
return;
3883+
Stores.push_back(*Loc);
3884+
}
3885+
}
3886+
}
3887+
3888+
VPBasicBlock *Preheader = Plan.getVectorPreheader();
3889+
for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
3890+
// Hoist the load to the preheader if it doesn't alias with any stores
3891+
// according to the noalias metadata. Other loads should have been hoisted
3892+
// by other passes
3893+
const AAMDNodes &LoadAA = LoadLoc.AATags;
3894+
if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
3895+
return !ScopedNoAliasAAResult::mayAliasInScopes(
3896+
LoadAA.Scope, StoreLoc.AATags.NoAlias);
3897+
})) {
3898+
LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
3899+
}
3900+
}
3901+
}
3902+
38463903
void VPlanTransforms::materializeConstantVectorTripCount(
38473904
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
38483905
PredicatedScalarEvolution &PSE) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,11 @@ struct VPlanTransforms {
307307
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
308308
static void materializeBroadcasts(VPlan &Plan);
309309

310+
/// Hoist single-scalar loads with invariant addresses out of the vector loop
311+
/// to the preheader, if they are proven not to alias with any stores in the
312+
/// plan using noalias metadata.
313+
static void hoistInvariantLoads(VPlan &Plan);
314+
310315
// Materialize vector trip counts for constants early if it can simply be
311316
// computed as (Original TC / VF * UF) * VF * UF.
312317
static void

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "VPlanDominatorTree.h"
1212
#include "VPlanPatternMatch.h"
1313
#include "llvm/ADT/TypeSwitch.h"
14+
#include "llvm/Analysis/MemoryLocation.h"
1415
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
1516

1617
using namespace llvm;
@@ -376,3 +377,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
376377
return VPB->getNumSuccessors() == 2 &&
377378
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
378379
}
380+
381+
std::optional<MemoryLocation>
382+
vputils::getMemoryLocation(const VPRecipeBase &R) {
383+
return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
384+
.Case<VPWidenStoreRecipe, VPInterleaveBase, VPReplicateRecipe>(
385+
[](auto *S) {
386+
MemoryLocation Loc;
387+
// Populate noalias metadata from VPIRMetadata.
388+
if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
389+
Loc.AATags.NoAlias = NoAliasMD;
390+
if (MDNode *AliasScopeMD =
391+
S->getMetadata(LLVMContext::MD_alias_scope))
392+
Loc.AATags.Scope = AliasScopeMD;
393+
return Loc;
394+
})
395+
.Default([](auto *) { return std::nullopt; });
396+
}

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "VPlan.h"
1313

1414
namespace llvm {
15+
class MemoryLocation;
1516
class ScalarEvolution;
1617
class SCEV;
1718
} // namespace llvm
@@ -71,6 +72,10 @@ std::optional<VPValue *>
7172
getRecipesForUncountableExit(VPlan &Plan,
7273
SmallVectorImpl<VPRecipeBase *> &Recipes,
7374
SmallVectorImpl<VPRecipeBase *> &GEPs);
75+
76+
/// Return a MemoryLocation for \p R with noalias metadata populated from
77+
/// \p R. The pointer of the location is conservatively set to nullptr.
78+
std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
7479
} // namespace vputils
7580

7681
//===----------------------------------------------------------------------===//

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
386386
; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
387387
; DEFAULT-NEXT: [[ENTRY:.*:]]
388388
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
389-
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
389+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
390390
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
391391
; DEFAULT: [[VECTOR_MEMCHECK]]:
392392
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -427,20 +427,20 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
427427
; DEFAULT: [[VECTOR_PH]]:
428428
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
429429
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
430-
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
431-
; DEFAULT: [[VECTOR_BODY]]:
432-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
433-
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
430+
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
434431
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
435432
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
436433
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
437434
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
438435
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
439-
; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
440-
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
436+
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
441437
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
442438
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
443-
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
439+
; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT31]]
440+
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT29]], [[TMP6]]
441+
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
442+
; DEFAULT: [[VECTOR_BODY]]:
443+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
444444
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
445445
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
446446
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
132132
; DEFAULT: vector.ph:
133133
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
134134
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
135-
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
136-
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
137-
; DEFAULT: vector.body:
138-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
139135
; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
140136
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
141137
; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
142138
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
139+
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
143140
; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
141+
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
142+
; DEFAULT: vector.body:
143+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
144144
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
145145
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
146146
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
@@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
156156
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
157157
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
158158
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
159-
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
160-
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
161-
; DEFAULT: vec.epilog.vector.body:
162-
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
163159
; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
164160
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
165161
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
166162
; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
163+
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
167164
; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
165+
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
166+
; DEFAULT: vec.epilog.vector.body:
167+
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
168168
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
169169
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
170170
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8

llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) {
1717
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1818
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1919
; CHECK: [[VECTOR_PH]]:
20+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
21+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
22+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2023
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
2124
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
2225
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2326
; CHECK: [[VECTOR_BODY]]:
2427
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2528
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
26-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
27-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP6]], i64 0
28-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2929
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT3]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
3030
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
3131
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]

0 commit comments

Comments
 (0)