386 changes: 275 additions & 111 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
Expand All @@ -46,43 +45,32 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 2
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 8, [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]]
; CHECK-NEXT: [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]]
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP23]], 1
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]]
; CHECK-NEXT: [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4
; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 2
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]]
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]]
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]]
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]]
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]]
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]]
; CHECK-NEXT: [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]]
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]]
; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8
; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]
Expand Down
72 changes: 30 additions & 42 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,50 +49,38 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8** [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP5]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP9]]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8*, i8** [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0
; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i32 0
; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT6]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT7]], [[TMP17]]
; CHECK-NEXT: [[VECTOR_GEP8:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <vscale x 2 x i64> [[VECTOR_GEP8]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP19]], i64 1
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i8**> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8*, i8** [[TMP21]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to <vscale x 2 x i8*>*
; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP20]], <vscale x 2 x i8*>* [[TMP23]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i8*> [[TMP19]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP26]], align 1
; CHECK-NEXT: [[TMP27:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP27]], <vscale x 2 x i8>* [[TMP28]], align 1
; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8*, i8** [[POINTER_PHI]], i64 [[TMP7]]
; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP12]], i64 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to <vscale x 2 x i8*>*
; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP13]], <vscale x 2 x i8*>* [[TMP15]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP18]], align 1
; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], <vscale x 2 x i8>* [[TMP20]], align 1
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
Expand Down
93 changes: 42 additions & 51 deletions llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -156,38 +156,32 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[SRC]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI7:%.*]] = phi i32* [ [[DST]], [[VECTOR_PH]] ], [ [[PTR_IND8:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 3
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[POINTER_PHI]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP8]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP15:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD15]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[POINTER_PHI7]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32>* [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i32 [[TMP17]], 2
; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32>* [[TMP21]], align 4
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]]
; CHECK-NEXT: [[PTR_IND8]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP9]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2
; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32>* [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
Expand All @@ -200,8 +194,8 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[S_010]], align 4
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP25]], 1
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1
; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1
Expand Down Expand Up @@ -257,37 +251,34 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i32** [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <vscale x 2 x i64> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32*, i32** [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[BC:%.*]] = bitcast <vscale x 2 x i32*> [[TMP7]] to <vscale x 2 x <vscale x 2 x i32>*>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP10]], align 8
; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32** [[POINTER_PHI4]] to <vscale x 2 x i32*>*
; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP12]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP8]], align 8
; CHECK-NEXT: [[TMP9]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[NEXT_GEP]] to <vscale x 2 x i32*>*
; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]]
; CHECK-NEXT: [[PTR_IND5]] = getelementptr i32*, i32** [[POINTER_PHI4]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP9]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
Expand All @@ -303,7 +294,7 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAR5]]
;
entry:
Expand Down