Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1902,6 +1902,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,

void execute(VPTransformState &State) override;

Type *getSourceElementType() const { return IndexedTy; }

bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
Expand Down
30 changes: 25 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1978,20 +1978,39 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return TypeSwitch<const VPSingleDefRecipe *,
std::optional<std::pair<bool, unsigned>>>(R)
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
VPWidenSelectRecipe, VPReplicateRecipe>(
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
return std::make_pair(true, I->getVectorIntrinsicID());
})
.Default([](auto *) { return std::nullopt; });
}

/// If recipe \p R will lower to a GEP with a non-i8 source element type,
/// return that source element type.
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {
// All VPInstructions that lower to GEPs must have the i8 source element
// type (as they are PtrAdds), so we omit it.
return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
.Case<VPReplicateRecipe, VPWidenGEPRecipe>([](auto *I) -> Type * {
if (auto *GEP = dyn_cast<GetElementPtrInst>(I->getUnderlyingValue()))
return GEP->getSourceElementType();
return nullptr;
})
.Case<VPVectorPointerRecipe>(
[](auto *I) { return I->getSourceElementType(); })
.Default([](auto *) { return nullptr; });
}

/// Returns true if recipe \p Def can be safely handed for CSE.
static bool canHandle(const VPSingleDefRecipe *Def) {
// We can extend the list of handled recipes in the future,
// provided we account for the data embedded in them while checking for
// equality or hashing.
auto C = getOpcodeOrIntrinsicID(Def);
// equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode,
// as it is essentially a GEP with different semantics.
auto C = isa<VPVectorPointerRecipe>(Def)
? std::make_pair(false, Instruction::GetElementPtr)
: getOpcodeOrIntrinsicID(Def);

// The issue with (Insert|Extract)Value is that the index of the
// insert/extract is not a proper operand in LLVM IR, and hence also not in
Expand All @@ -2012,8 +2031,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
VPTypeAnalysis TypeInfo(*Plan);
hash_code Result = hash_combine(
Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def),
hash_combine_range(Def->operands()));
getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
vputils::isSingleScalar(Def), hash_combine_range(Def->operands()));
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(Def))
if (RFlags->hasPredicate())
return hash_combine(Result, RFlags->getPredicate());
Expand All @@ -2026,6 +2045,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
return L == R;
if (L->getVPDefID() != R->getVPDefID() ||
getOpcodeOrIntrinsicID(L) != getOpcodeOrIntrinsicID(R) ||
getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
vputils::isSingleScalar(L) != vputils::isSingleScalar(R) ||
!equal(L->operands(), R->operands()))
return false;
Expand Down
20 changes: 4 additions & 16 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
Expand Down Expand Up @@ -584,11 +581,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
Expand Down Expand Up @@ -656,11 +650,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP16]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
Expand Down Expand Up @@ -719,11 +710,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]]
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP14]], align 4
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP22]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
Expand Down Expand Up @@ -216,11 +213,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down Expand Up @@ -296,11 +290,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -378,11 +369,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down Expand Up @@ -455,11 +443,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
Expand Down
15 changes: 3 additions & 12 deletions llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -430,11 +430,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -557,11 +554,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down Expand Up @@ -1060,11 +1054,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD1]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]]
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP12]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4
; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP6]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
;
Expand Down
Loading