40 changes: 20 additions & 20 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -56,8 +58,6 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP26]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[TMP27]]
; CHECK-NEXT: store <vscale x 8 x double> [[TMP18]], ptr [[TMP29]], align 8
; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP31]]
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -125,6 +125,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP34:%.*]] = shl i64 [[TMP33]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -158,8 +160,6 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[TMP22]], i64 [[TMP29]]
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[TMP31]], i64 [[TMP30]]
; CHECK-NEXT: store <vscale x 8 x i64> [[TMP21]], ptr [[TMP32]], align 8
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP34:%.*]] = shl i64 [[TMP33]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ define void @test_big_little_params(ptr readonly %a, ptr readonly %b, ptr noalia
; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1025)
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -19,8 +21,6 @@ define void @test_big_little_params(ptr readonly %a, ptr readonly %b, ptr noalia
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @foo_vector(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i8> [[WIDE_MASKED_LOAD1]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP2]], ptr [[TMP3]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
Expand Down Expand Up @@ -53,6 +53,8 @@ define void @test_little_big_params(ptr readonly %a, ptr readonly %b, ptr noalia
; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -64,8 +66,6 @@ define void @test_little_big_params(ptr readonly %a, ptr readonly %b, ptr noalia
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @bar_vector(<vscale x 2 x float> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD1]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP2]], ptr [[TMP3]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -75,8 +77,6 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP18]], align 1
; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], ptr [[TMP18]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -142,6 +142,8 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -162,8 +164,6 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP14]], align 1
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP15]], ptr [[TMP14]], align 1
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -61,8 +63,6 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP23]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP20]], ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -143,6 +143,8 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -162,8 +164,6 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
Expand Down Expand Up @@ -234,6 +234,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -250,8 +252,6 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP8]], ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -310,6 +310,8 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -322,8 +324,6 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 2 x ptr> [[TMP3]], i64 0
; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP5]], i32 2, <vscale x 2 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
; NONE-NEXT: br label [[VECTOR_BODY:%.*]]
; NONE: vector.body:
; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
; NONE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0
; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
; NONE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0
; NONE-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4
; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]]
; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -71,6 +71,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; DATA-NEXT: br label [[VECTOR_BODY:%.*]]
; DATA: vector.body:
; DATA-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -79,8 +81,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
; DATA-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -121,6 +121,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
; DATA_NO_LANEMASK: vector.body:
; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -134,8 +136,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[TMP12]])
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]]
; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -174,6 +174,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; DATA_AND_CONTROL-NEXT: br label [[VECTOR_BODY:%.*]]
; DATA_AND_CONTROL: vector.body:
; DATA_AND_CONTROL-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -182,8 +184,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
Expand Down Expand Up @@ -225,6 +225,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; DATA_AND_CONTROL_NO_RT_CHECK: vector.body:
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -233,8 +235,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]]
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]]
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i16> [[TMP8]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 8 x i16> [[TMP9]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 8 x i16> [[TMP11]], ptr [[TMP12]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -115,18 +115,18 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i16> [[TMP8]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 8 x i16> [[TMP9]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 8 x i16> [[TMP11]], ptr [[TMP12]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]])
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -22,8 +24,6 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP4]], ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
Expand Down Expand Up @@ -57,6 +57,8 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]])
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -66,8 +68,6 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP4]], ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]])
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
; WIDE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
; WIDE-NEXT: br label [[VECTOR_BODY:%.*]]
; WIDE: vector.body:
; WIDE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -25,8 +27,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 {
; WIDE-NEXT: [[TMP6:%.*]] = call <vscale x 4 x float> @foo_vector(<vscale x 4 x float> [[TMP5]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; WIDE-NEXT: store <vscale x 4 x float> [[TMP6]], ptr [[TMP7]], align 4
; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; WIDE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; WIDE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -85,22 +85,22 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP7]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP7]])
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP9]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand Down
198 changes: 99 additions & 99 deletions llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]]
; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; OUTLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; OUTLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4
; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; OUTLOOP: vector.body:
; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
; OUTLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; OUTLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]]
; OUTLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP6]], align 2
; OUTLOOP-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i32>
; OUTLOOP-NEXT: [[TMP8]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP7]]
; OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP10]]
; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
; OUTLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
; OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2
; OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i32>
; OUTLOOP-NEXT: [[TMP10]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP9]]
; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; OUTLOOP: middle.block:
; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP8]])
; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]])
; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; OUTLOOP: scalar.ph:
Expand Down Expand Up @@ -75,28 +75,28 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8
; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]]
; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
; INLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; INLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 8
; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; INLOOP: vector.body:
; INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; INLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; INLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]]
; INLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP6]], align 2
; INLOOP-NEXT: [[TMP7:%.*]] = sext <vscale x 8 x i16> [[WIDE_LOAD]] to <vscale x 8 x i32>
; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP7]])
; INLOOP-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]]
; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32()
; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 8
; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]]
; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; INLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
; INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP8]], align 2
; INLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 8 x i16> [[WIDE_LOAD]] to <vscale x 8 x i32>
; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP9]])
; INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; INLOOP: middle.block:
; INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
; INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; INLOOP: scalar.ph:
; INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; INLOOP: for.body:
; INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
Expand All @@ -109,7 +109,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; INLOOP: for.cond.cleanup.loopexit:
; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; INLOOP-NEXT: br label [[FOR_COND_CLEANUP]]
; INLOOP: for.cond.cleanup:
; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -276,8 +278,6 @@ define void @load_store_factor3_i64(ptr %p) {
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP17]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[WIDE_MASKED_GATHER2]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[TMP18]], <vscale x 2 x ptr> [[TMP17]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -362,6 +362,8 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -406,8 +408,6 @@ define void @load_store_factor8(ptr %p) {
; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP32]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP33:%.*]] = add <vscale x 2 x i64> [[WIDE_MASKED_GATHER7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[TMP33]], <vscale x 2 x ptr> [[TMP32]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP35]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ define void @load_store(ptr %p) {
; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]]
; LMUL1: vector.body:
; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -24,7 +25,6 @@ define void @load_store(ptr %p) {
; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
; LMUL1-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
; LMUL1-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -57,6 +57,8 @@ define void @load_store(ptr %p) {
; LMUL2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]]
; LMUL2: vector.body:
; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -66,8 +68,6 @@ define void @load_store(ptr %p) {
; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8
; LMUL2-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; LMUL2-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8
; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; LMUL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; LMUL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -100,6 +100,8 @@ define void @load_store(ptr %p) {
; LMUL4-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; LMUL4-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; LMUL4-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
; LMUL4-NEXT: br label [[VECTOR_BODY:%.*]]
; LMUL4: vector.body:
; LMUL4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -109,8 +111,6 @@ define void @load_store(ptr %p) {
; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP6]], align 8
; LMUL4-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; LMUL4-NEXT: store <vscale x 4 x i64> [[TMP7]], ptr [[TMP6]], align 8
; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; LMUL4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; LMUL4-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -143,6 +143,8 @@ define void @load_store(ptr %p) {
; LMUL8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
; LMUL8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; LMUL8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
; LMUL8-NEXT: br label [[VECTOR_BODY:%.*]]
; LMUL8: vector.body:
; LMUL8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -152,8 +154,6 @@ define void @load_store(ptr %p) {
; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP6]], align 8
; LMUL8-NEXT: [[TMP7:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
; LMUL8-NEXT: store <vscale x 8 x i64> [[TMP7]], ptr [[TMP6]], align 8
; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
; LMUL8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; LMUL8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP5]], i64 5)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP7]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 16 x i8> [[TMP8]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr [[TMP10]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP7]], i64 5)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP9]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP12]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 16 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP13]], ptr [[TMP12]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,35 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; VLENUNK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
; VLENUNK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
; VLENUNK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i64> [[TMP6]], zeroinitializer
; VLENUNK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP7]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]]
; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
; VLENUNK: vector.body:
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VLENUNK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
; VLENUNK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i32> poison)
; VLENUNK-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP11]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]]
; VLENUNK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[PREDPHI]], [[BROADCAST_SPLAT]]
; VLENUNK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP10]]
; VLENUNK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; VLENUNK-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP17]], align 4
; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
; VLENUNK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
; VLENUNK-NEXT: [[TMP13:%.*]] = icmp ult <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 512, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP12]]
; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0
; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> poison)
; VLENUNK-NEXT: [[TMP16:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; VLENUNK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]]
; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[PREDPHI]], [[BROADCAST_SPLAT]]
; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP12]]
; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
; VLENUNK-NEXT: store <vscale x 4 x i32> [[TMP17]], ptr [[TMP19]], align 4
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]]
; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
; RV32-NEXT: br label [[VECTOR_BODY:%.*]]
; RV32: vector.body:
; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -62,8 +64,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV32-NEXT: [[TMP16:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP15]]
; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP16]], <vscale x 2 x ptr> [[TMP17]], i32 8, <vscale x 2 x i1> [[TMP12]]), !alias.scope !5, !noalias !7
; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
; RV32-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -130,6 +130,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]]
; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
; RV64-NEXT: br label [[VECTOR_BODY:%.*]]
; RV64: vector.body:
; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -144,8 +146,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
; RV64-NEXT: [[TMP16:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP15]]
; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP16]], <vscale x 2 x ptr> [[TMP17]], i32 8, <vscale x 2 x i1> [[TMP12]]), !alias.scope !5, !noalias !7
; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
; RV64-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
66 changes: 33 additions & 33 deletions llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ define void @test(ptr %p) {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 32
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 200
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP9]], align 32
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 200
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -142,20 +142,20 @@ define void @trivial_due_max_vscale(ptr %p) {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 32
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 8192
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP9]], align 32
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 8192
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -208,20 +208,20 @@ define void @no_high_lmul_or_interleave(ptr %p) {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 32
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 1024
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP9]], align 32
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 32
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 1024
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> [[WIDE_LOAD]], ptr [[TMP11]], align 32
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll

Large diffs are not rendered by default.

120 changes: 60 additions & 60 deletions llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 {
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -52,8 +54,6 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 {
; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT2]]
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -117,6 +117,8 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -127,8 +129,6 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0
; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT2]]
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down Expand Up @@ -184,6 +184,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -194,8 +196,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down Expand Up @@ -261,6 +261,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64
; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -271,8 +273,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64
; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT]]
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -330,6 +330,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -340,8 +342,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down Expand Up @@ -428,6 +428,8 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; SCALABLE: vector.body:
; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -444,8 +446,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
; SCALABLE-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[VEC_PHI]]
; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -24,8 +26,6 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -75,6 +75,8 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -85,8 +87,6 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]]
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -35,8 +37,6 @@ define void @single_constant_stride_int_scaled(ptr %p) {
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -99,6 +99,8 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 64, [[TMP8]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -107,8 +109,6 @@ define void @single_constant_stride_int_iv(ptr %p) {
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP11]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -236,6 +236,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -245,8 +247,6 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -320,6 +320,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -329,8 +331,6 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down Expand Up @@ -452,6 +452,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]]
; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -463,8 +465,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP9]]
; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
Expand Down Expand Up @@ -529,6 +529,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -539,8 +541,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP16]]
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope !11, !noalias !8
; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -601,6 +601,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -610,8 +612,6 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], align 4
; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
Expand Down Expand Up @@ -729,6 +729,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]]
; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]]
; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4
; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED: vector.body:
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -761,8 +763,6 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope !15
; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP28]], <vscale x 4 x ptr> [[TMP27]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope !18, !noalias !15
; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]]
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]]
; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]]
Expand Down
Loading