Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,50 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
}
}

static VPSingleDefRecipe *findHeaderMask(VPlan &Plan);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?


/// Convert scatters with a uniform address that are either unmasked or
/// masked by the header mask into an extract-last-element + scalar store.
// TODO: Add a profitability check comparing the cost of a scatter vs.
// extract + scalar store.
static void optimizeScatterWithUniformAddr(VPlan &Plan) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason this cannot be handled in narrowToSingleScalar where we already have similar transforms?

VPValue *HeaderMask = findHeaderMask(Plan);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_deep(Plan.getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {

// Only transform store recipes.
if (!isa<VPWidenStoreRecipe, VPWidenStoreEVLRecipe>(&R))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we only can handle VPWidenStoreRecipe before @fhahn's LastActiveLane patch is landed.

continue;

auto StoreR = cast<VPWidenMemoryRecipe>(&R);
if (StoreR->isConsecutive() ||
!vputils::isSingleScalar(StoreR->getAddr()))
continue;

assert(!StoreR->isReverse() &&
"Not consecutive memory recipes shouldn't be reversed");
VPValue *Mask = StoreR->getMask();

// Only convert the scatter to a scalar store if it is unmasked or masked
// by the header mask, which guarantees that at least one active lane.
if (Mask && Mask != HeaderMask)
continue;
Comment on lines +1398 to +1403
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPValue *Mask = StoreR->getMask();
// Only convert the scatter to a scalar store if it is unmasked or masked
// by the header mask, which guarantees that at least one active lane.
if (Mask && Mask != HeaderMask)
continue;
// Only convert the scatter to a scalar store if it is unmasked.
// TODO: Support header mask.
if (StoreR->isMasked())
continue;


auto *Extract = new VPInstruction(VPInstruction::ExtractLastElement,
{StoreR->getOperand(1)});
Extract->insertBefore(StoreR);

// TODO: Sink the scalar store recipe to middle block if possible.
auto *ScalarStore = new VPReplicateRecipe(
&StoreR->getIngredient(), {Extract, StoreR->getAddr()},
true /*IsSingleScalar*/, nullptr /*Mask*/, *StoreR /*Metadata*/);
ScalarStore->insertBefore(StoreR);
StoreR->eraseFromParent();
}
}
}

static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
Expand Down Expand Up @@ -2320,6 +2364,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeDeadRecipes, Plan);
runPass(simplifyBlends, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(optimizeScatterWithUniformAddr, Plan);
runPass(narrowToSingleScalarRecipes, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
runPass(simplifyRecipes, Plan);
Expand Down
27 changes: 15 additions & 12 deletions llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[COND:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT1]], splat (i1 true)
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[BOXES]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[NBRBOXES]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[BOXES]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ [[TMP3]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> [[TMP13]], i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT4]], <vscale x 4 x i1> [[TMP13]], i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[BROADCAST_SPLAT1]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP12]], 4
; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i32> [[PREDPHI]], i32 [[TMP14]]
; CHECK-NEXT: store i32 [[TMP15]], ptr [[NBRBOXES]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP10]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
Expand All @@ -63,7 +65,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
; CHECK-NEXT: store i32 [[STORE]], ptr [[NBRBOXES]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[IV]], [[IBOX]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -114,7 +116,7 @@ define void @predicated_strided_store(ptr %start) {
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
; RVA23-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; RVA23: middle.block:
; RVA23-NEXT: br label [[LOOP:%.*]]
; RVA23: exit:
Expand All @@ -141,7 +143,7 @@ define void @predicated_strided_store(ptr %start) {
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]]
; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; RVA23ZVL1024B: middle.block:
; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]]
; RVA23ZVL1024B: exit:
Expand Down Expand Up @@ -188,13 +190,14 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P3:%.*]], <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> zeroinitializer, <vscale x 2 x ptr> align 4 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> zeroinitializer, <vscale x 2 x ptr> align 1 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 0
; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
; CHECK-NEXT: store i8 0, ptr [[TMP8]], align 1
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: exit:
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 {
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP2:%.*]] = mul <vscale x 2 x i32> [[TMP1]], splat (i32 2)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> splat (i32 1), [[TMP2]]
Expand All @@ -144,7 +142,11 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 {
; CHECK-NEXT: [[TMP5:%.*]] = or <vscale x 2 x i32> [[VEC_IND2]], [[VEC_IND]]
; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 2 x i32> [[TMP5]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], <vscale x 2 x i64> [[TMP6]]
; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[TMP7]], <vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP3]])
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 2
; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP13]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[TMP7]], i32 [[TMP10]]
; CHECK-NEXT: store ptr [[TMP11]], ptr [[DST]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]]
Expand Down
57 changes: 37 additions & 20 deletions llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,46 @@
define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) {
; CHECK-LABEL: define void @pr154103(
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[GEP]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[X]] to i64
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 0, [[CONV]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[DIV]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[THEN:.*]], label %[[LATCH]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[Y]] to i64
; CHECK-NEXT: [[NOT:%.*]] = xor i64 [[ZEXT]], 0
; CHECK-NEXT: br label %[[LATCH]]
; CHECK: [[LATCH]]:
; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[NOT]], %[[THEN]] ], [ 0, %[[LOOP]] ]
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[COND]] to i16
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 4 x i64> [[TMP0]], splat (i64 7)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 1), [[TMP1]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ -7905747460161236406, %[[VECTOR_PH]] ], [ [[IV:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 7, [[TMP3]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], <vscale x 4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i64> [[TMP6]], <vscale x 4 x i64> splat (i64 1), i32 [[TMP2]])
; CHECK-NEXT: [[TMP8:%.*]] = sdiv <vscale x 4 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 4 x i1> [[TMP9]], i32 [[TMP2]])
; CHECK-NEXT: [[TMP10:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_GATHER3]] to <vscale x 4 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = xor <vscale x 4 x i64> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i64> [[TMP11]], <vscale x 4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = trunc <vscale x 4 x i64> [[PREDPHI]] to <vscale x 4 x i16>
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i32 [[TMP13]], 4
; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1
; CHECK-NEXT: [[TRUNC:%.*]] = extractelement <vscale x 4 x i16> [[TMP12]], i32 [[TMP15]]
; CHECK-NEXT: store i16 [[TRUNC]], ptr [[C]], align 2
; CHECK-NEXT: store i32 0, ptr [[D]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 7
; CHECK-NEXT: [[IV]] = sub nuw i64 [[AVL]], [[TMP3]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK-NEXT: br i1 [[DONE]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
Expand Down
Loading
Loading