Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0
; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP15]], align 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 16
; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32
; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48
; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP15]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> [[WIDE_LOAD1]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
Expand All @@ -40,8 +40,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16
; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -129,8 +129,8 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP8]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> [[WIDE_LOAD5]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
Expand All @@ -147,8 +147,8 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 0
; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP21]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16
; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP21]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 16
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 24
; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4
; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD3]], ptr [[TMP4]], align 4
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
Expand Down Expand Up @@ -211,12 +211,12 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i64 4
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i64 12
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], ptr [[TMP4]], align 8
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
Expand Down Expand Up @@ -377,12 +377,12 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8
; AUTO_VEC-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 16
; AUTO_VEC-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 24
; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
; AUTO_VEC-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
; AUTO_VEC-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
; AUTO_VEC-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
; AUTO_VEC-NEXT: [[TMP6:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], [[WIDE_LOAD]]
; AUTO_VEC-NEXT: [[TMP7:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], [[WIDE_LOAD5]]
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
; SSE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; SSE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; SSE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; SSE-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
; SSE-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; SSE-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 4
; SSE-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
; SSE-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
; SSE-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 4
; SSE-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
; SSE-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4
; SSE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; SSE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; SSE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -72,18 +72,18 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
; AVX1-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX1-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX1-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x i32> [[WIDE_VEC3]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX1-NEXT: [[TMP11:%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
; AVX1-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
; AVX1-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
; AVX1-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
; AVX1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; AVX1-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP15]], align 4
; AVX1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4
; AVX1-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP16]], align 4
; AVX1-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8
; AVX1-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP17]], align 4
; AVX1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 12
; AVX1-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP18]], align 4
; AVX1-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
; AVX1-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
; AVX1-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
; AVX1-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
; AVX1-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 4
; AVX1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
; AVX1-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 12
; AVX1-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP19]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP21]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP17]], ptr [[TMP23]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP25]], align 4
; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; AVX1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -126,18 +126,18 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
; AVX2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; AVX2-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; AVX2-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <16 x i32> [[WIDE_VEC3]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; AVX2-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
; AVX2-NEXT: [[TMP12:%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
; AVX2-NEXT: [[TMP13:%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
; AVX2-NEXT: [[TMP14:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; AVX2-NEXT: store <8 x i32> [[TMP11]], ptr [[TMP15]], align 4
; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8
; AVX2-NEXT: store <8 x i32> [[TMP12]], ptr [[TMP16]], align 4
; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 16
; AVX2-NEXT: store <8 x i32> [[TMP13]], ptr [[TMP17]], align 4
; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 24
; AVX2-NEXT: store <8 x i32> [[TMP14]], ptr [[TMP18]], align 4
; AVX2-NEXT: [[TMP15:%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
; AVX2-NEXT: [[TMP16:%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
; AVX2-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
; AVX2-NEXT: [[TMP18:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 16
; AVX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 24
; AVX2-NEXT: store <8 x i32> [[TMP15]], ptr [[TMP19]], align 4
; AVX2-NEXT: store <8 x i32> [[TMP16]], ptr [[TMP21]], align 4
; AVX2-NEXT: store <8 x i32> [[TMP17]], ptr [[TMP23]], align 4
; AVX2-NEXT: store <8 x i32> [[TMP18]], ptr [[TMP25]], align 4
; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; AVX2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,23 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i16>, ptr [[TMP3]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i16>, ptr [[TMP4]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i16>, ptr [[TMP3]], align 2
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i16>, ptr [[TMP4]], align 2
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD]], <16 x i16> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD9]], <16 x i16> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD10]], <16 x i16> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD11]], <16 x i16> [[BROADCAST_SPLAT]])
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[NEXT_GEP5]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 16
; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 32
; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP11]], align 2
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 48
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[NEXT_GEP5]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP10]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP11]], align 2
; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP12]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -167,23 +167,23 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP1]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP1]], align 2
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP2]], align 2
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <32 x i8>, ptr [[TMP3]], align 2
; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD]], <32 x i8> [[WIDE_LOAD]], <32 x i8> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD9]], <32 x i8> [[WIDE_LOAD9]], <32 x i8> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD10]], <32 x i8> [[WIDE_LOAD10]], <32 x i8> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD11]], <32 x i8> [[WIDE_LOAD11]], <32 x i8> [[BROADCAST_SPLAT]])
; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[NEXT_GEP5]], align 2
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 32
; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 64
; CHECK-NEXT: store <32 x i8> [[TMP6]], ptr [[TMP9]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 96
; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[NEXT_GEP5]], align 2
; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP8]], align 2
; CHECK-NEXT: store <32 x i8> [[TMP6]], ptr [[TMP9]], align 2
; CHECK-NEXT: store <32 x i8> [[TMP7]], ptr [[TMP10]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b)
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 32
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 48
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i32>, ptr [[TMP4]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI4]], [[WIDE_LOAD7]]
Expand Down
66 changes: 33 additions & 33 deletions llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll

Large diffs are not rendered by default.

336 changes: 168 additions & 168 deletions llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope !0
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP8]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 4
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD3]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 8
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 12
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD3]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP12]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ define i32 @main(ptr %ptr) {
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP20]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[TMP21]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP24]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP25]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
Expand All @@ -98,7 +98,7 @@ define i32 @main(ptr %ptr) {
; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[DEC]] to i32
; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP0]], [[CONV5]]
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.cond4.for.inc9_crit_edge:
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i32 [[INC_LCSSA]], ptr @a, align 16
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/X86/pr47437.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon
; SSE41-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[TMP0]]
; SSE41-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[D1]], i64 [[TMP1]]
; SSE41-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0
; SSE41-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP28]], align 4
; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 4
; SSE41-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP28]], align 4
; SSE41-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP29]], align 4
; SSE41-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; SSE41-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -271,12 +271,12 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon
; AVX1-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr [[D1]], i64 [[TMP2]]
; AVX1-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[D1]], i64 [[TMP3]]
; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 0
; AVX1-NEXT: store <4 x i32> [[TMP48]], ptr [[TMP56]], align 4
; AVX1-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 4
; AVX1-NEXT: store <4 x i32> [[TMP49]], ptr [[TMP57]], align 4
; AVX1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 8
; AVX1-NEXT: store <4 x i32> [[TMP50]], ptr [[TMP58]], align 4
; AVX1-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i32 12
; AVX1-NEXT: store <4 x i32> [[TMP48]], ptr [[TMP56]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP49]], ptr [[TMP57]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP50]], ptr [[TMP58]], align 4
; AVX1-NEXT: store <4 x i32> [[TMP51]], ptr [[TMP59]], align 4
; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; AVX1-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP7]] = fadd fast <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]]
Expand All @@ -88,7 +88,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) {
; CHECK-NEXT: [[SUM_INC]] = fadd fast float [[SUM]], [[VALUE]]
; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1
; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096
; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.exit.loopexit:
; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP_EXIT]]
Expand Down Expand Up @@ -133,8 +133,8 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6]] = fadd reassoc <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP7]] = fadd reassoc <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]]
Expand Down Expand Up @@ -202,8 +202,8 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array)
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6]] = fadd reassoc contract <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP7]] = fadd reassoc contract <4 x float> [[VEC_PHI1]], [[WIDE_LOAD2]]
Expand Down Expand Up @@ -278,8 +278,8 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
Expand Down Expand Up @@ -363,8 +363,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
; CHECK-NEXT: store <2 x i32> <i32 13, i32 13>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2
; CHECK-NEXT: store <2 x i32> <i32 13, i32 13>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4
; CHECK-NEXT: store <2 x i32> <i32 13, i32 13>, ptr [[TMP6]], align 1
; CHECK-NEXT: store <2 x i32> <i32 13, i32 13>, ptr [[TMP7]], align 1
; CHECK-NEXT: store <2 x i32> <i32 13, i32 13>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 6
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800
Expand All @@ -40,7 +40,7 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize {
; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 1800
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<[[FOR1_SPLICE:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.ptr>
; CHECK-NEXT: WIDEN ir<%for.1.next> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: EMIT vp<[[FOR1_SPLICE]]> = first-order splice ir<%for.1>, ir<%for.1.next>
; CHECK-NEXT: EMIT vp<[[FOR2_SPLICE:%.+]]> = first-order splice ir<%for.2>, vp<[[FOR1_SPLICE]]>
; CHECK-NEXT: WIDEN ir<%add> = add vp<[[FOR1_SPLICE]]>, vp<[[FOR2_SPLICE]]>
; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.ptr>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -71,13 +73,15 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.3> = phi ir<33>, vp<[[FOR2_SPLICE:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.ptr>
; CHECK-NEXT: WIDEN ir<%for.1.next> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: EMIT vp<[[FOR1_SPLICE]]> = first-order splice ir<%for.1>, ir<%for.1.next>
; CHECK-NEXT: EMIT vp<[[FOR2_SPLICE]]> = first-order splice ir<%for.2>, vp<[[FOR1_SPLICE]]>
; CHECK-NEXT: EMIT vp<[[FOR3_SPLICE:%.+]]> = first-order splice ir<%for.3>, vp<[[FOR2_SPLICE]]>
; CHECK-NEXT: WIDEN ir<%add.1> = add vp<[[FOR1_SPLICE]]>, vp<[[FOR2_SPLICE]]>
; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<[[FOR3_SPLICE]]>
; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add.2>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.ptr>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add.2>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP10]], align 4
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand All @@ -48,8 +48,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP11]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP12]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP17]], align 4
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP17]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -246,8 +246,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP6]], align 4
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand Down Expand Up @@ -507,8 +507,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 2
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP8]], align 2
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand All @@ -523,8 +523,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP17]], ptr [[TMP21]], align 8
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP17]], ptr [[TMP21]], align 8
; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP18]], ptr [[TMP22]], align 8
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -1818,8 +1818,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP7]], align 2
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand All @@ -1832,8 +1832,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP19]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -2041,8 +2041,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP18]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP19]], align 4
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2
Expand Down Expand Up @@ -2071,8 +2071,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP46]], align 4
; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP46]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP47]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -2284,8 +2284,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP7]], align 2
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand All @@ -2300,8 +2300,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP20]], align 4
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP20]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], ptr [[TMP21]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -2601,8 +2601,8 @@ define void @sink_dead_inst(ptr %a) {
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[A]], i16 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[TMP14]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP12]], ptr [[TMP16]], align 2
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[TMP14]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP12]], ptr [[TMP16]], align 2
; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP13]], ptr [[TMP17]], align 2
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
Expand Down Expand Up @@ -3514,8 +3514,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP12]], align 4
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 4
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP12]], align 4
; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP13]], align 4
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopVectorize/float-induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
Expand Down Expand Up @@ -349,8 +349,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
Expand Down Expand Up @@ -586,8 +586,8 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
Expand Down Expand Up @@ -859,8 +859,8 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL2-NEXT: [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]]
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND10]], ptr [[TMP6]], align 4
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND10]], ptr [[TMP6]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4
; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]]
; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT]]
Expand All @@ -869,12 +869,12 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]]
; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]]
; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP12]], ptr [[TMP14]], align 4
; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP12]], ptr [[TMP14]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP13]], ptr [[TMP15]], align 4
; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP10]], ptr [[TMP16]], align 4
; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP10]], ptr [[TMP16]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[TMP11]], ptr [[TMP17]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00>
Expand Down Expand Up @@ -1167,8 +1167,8 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
Expand Down Expand Up @@ -1403,8 +1403,8 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 4
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; VEC4_INTERL2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer
Expand Down
78 changes: 39 additions & 39 deletions llvm/test/Transforms/LoopVectorize/induction.ll

Large diffs are not rendered by default.

64 changes: 32 additions & 32 deletions llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -503,27 +503,27 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
; CHECK-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT9]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-NEXT: [[DOTSPLAT12:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT11]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT12]]
; CHECK-NEXT: [[INDUCTION13:%.*]] = add <4 x i8> [[DOTSPLAT10]], [[TMP8]]
; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT11]]
; CHECK-NEXT: [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
; CHECK-NEXT: [[DOTSPLATINSERT14:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT14]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND16:%.*]] = phi <4 x i8> [ [[INDUCTION13]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX18:%.*]] = add i64 1, [[INDEX8]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX18]], 0
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i8> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX17:%.*]] = add i64 1, [[INDEX7]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX17]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
; CHECK-NEXT: store <4 x i8> [[VEC_IND16]], ptr [[TMP12]], align 1
; CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX8]], 4
; CHECK-NEXT: [[VEC_IND_NEXT17]] = add <4 x i8> [[VEC_IND16]], [[DOTSPLAT15]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT19]], 84
; CHECK-NEXT: store <4 x i8> [[VEC_IND15]], ptr [[TMP12]], align 1
; CHECK-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX7]], 4
; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i8> [[VEC_IND15]], [[DOTSPLAT14]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT18]], 84
; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]]
Expand Down Expand Up @@ -593,27 +593,27 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT9]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT11:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT12:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT11]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT12]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION13:%.*]] = add <2 x i8> [[DOTSPLAT10]], [[TMP8]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT11]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT14:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT15:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT14]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND16:%.*]] = phi <2 x i8> [ [[INDUCTION13]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[OFFSET_IDX18:%.*]] = add i64 1, [[INDEX8]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX18]], 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND15:%.*]] = phi <2 x i8> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[OFFSET_IDX17:%.*]] = add i64 1, [[INDEX7]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX17]], 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP10]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[VEC_IND16]], ptr [[TMP12]], align 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX8]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT17]] = add <2 x i8> [[VEC_IND16]], [[DOTSPLAT15]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT19]], 84
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[VEC_IND15]], ptr [[TMP12]], align 1
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX7]], 2
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT16]] = add <2 x i8> [[VEC_IND15]], [[DOTSPLAT14]]
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT18]], 84
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block:
; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) {
; UF3-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV1]]
; UF3-NEXT: [[GEP2:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV2]]
; UF3-NEXT: [[L_GEP0:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 0
; UF3-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[L_GEP0]], align 4
; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4
; UF3-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4
; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8
; UF3-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[L_GEP0]], align 4
; UF3-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4
; UF3-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4
; UF3-NEXT: [[SUM0_NEXT]] = add <4 x i32> [[SUM0]], [[L0]]
; UF3-NEXT: [[SUM1_NEXT]] = add <4 x i32> [[SUM1]], [[L1]]
Expand Down Expand Up @@ -50,14 +50,14 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) {
; UF5-NEXT: [[GEP3:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV3]]
; UF5-NEXT: [[GEP4:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV4]]
; UF5-NEXT: [[L_GEP0:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 0
; UF5-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[L_GEP0]], align 4
; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4
; UF5-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4
; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8
; UF5-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4
; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 12
; UF5-NEXT: [[L3:%.+]] = load <4 x i32>, ptr [[L_GEP3]], align 4
; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 16
; UF5-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[L_GEP0]], align 4
; UF5-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4
; UF5-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4
; UF5-NEXT: [[L3:%.+]] = load <4 x i32>, ptr [[L_GEP3]], align 4
; UF5-NEXT: [[L4:%.+]] = load <4 x i32>, ptr [[L_GEP4]], align 4
; UF5-NEXT: [[SUM0_NEXT]] = add <4 x i32> [[SUM0]], [[L0]]
; UF5-NEXT: [[SUM1_NEXT]] = add <4 x i32> [[SUM1]], [[L1]]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP11]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_IND]]
; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 1
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i64 [[TMP17]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP14]], ptr [[TMP18]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[STEP_ADD]], [[DOTSPLAT]]
Expand Down Expand Up @@ -113,16 +113,16 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP9]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD]], [[VEC_IND]]
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP12]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[STEP_ADD]], [[DOTSPLAT]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@
; CHECKUF2: vector.body:
; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 2
; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2]]
; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8
; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 2
; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2]]
; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8
; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]]
; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK: %[[T2:.+]] = add nuw nsw i64 %offset.idx, %tmp0
; CHECK: %[[T3:.+]] = sub nsw i64 %[[T2]], %x
; CHECK: %[[T4:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T3]]
; CHECK: load <4 x i32>, ptr %[[T4]], align 4
; CHECK: %[[T6:.+]] = getelementptr inbounds i32, ptr %[[T4]], i64 4
; CHECK: load <4 x i32>, ptr %[[T4]], align 4
; CHECK: load <4 x i32>, ptr %[[T6]], align 4
; CHECK: br {{.*}}, label %middle.block, label %vector.body
;
Expand All @@ -30,8 +30,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T6]]
; NO-IC: %[[T9:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T7]]
; NO-IC: %[[T10:.+]] = getelementptr inbounds i32, ptr %[[T8]], i32 0
; NO-IC: load <4 x i32>, ptr %[[T10]], align 4
; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i32 4
; NO-IC: load <4 x i32>, ptr %[[T10]], align 4
; NO-IC: load <4 x i32>, ptr %[[T12]], align 4
; NO-IC: br {{.*}}, label %middle.block, label %vector.body
;
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION ir\<0\>, vp\<[[CAN_IV_NEXT:%.+]]\>\l" +
; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>\l" +
; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr inbounds ir\<%y\>, vp\<[[STEPS]]\>\l" +
; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" +
; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer ir\<%arrayidx\>\l" +
; CHECK-NEXT: " WIDEN ir\<%lv\> = load vp\<[[VEC_PTR]]\>\l" +
; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>) (using vector intrinsic)\l" +
; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr inbounds ir\<%x\>, vp\<[[STEPS]]\>\l" +
; CHECK-NEXT: " WIDEN store ir\<%arrayidx2\>, ir\<%call\>\l" +
; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer ir\<%arrayidx2\>\l" +
; CHECK-NEXT: " WIDEN store vp\<[[VEC_PTR2]]\>, ir\<%call\>\l" +
; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = add nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" +
; CHECK-NEXT: " EMIT branch-on-count vp\<[[CAN_IV_NEXT]]\>, vp\<[[VEC_TC]]\>\l" +
; CHECK-NEXT: "No successors\l"
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) {
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next.p, ir<1>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%dst>, vp<[[STEPS:%.+]]>
; CHECK-NEXT: WIDEN store ir<%gep>, ir<%iv>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%iv>
; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down
63 changes: 42 additions & 21 deletions llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>)
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%call>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -70,12 +72,14 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x,
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0, ir<1>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%cmp> = icmp eq ir<%arrayidx>, ir<%z>
; CHECK-NEXT: WIDEN-SELECT ir<%sel> = select ir<%cmp>, ir<1.000000e+01>, ir<2.000000e+01>
; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel>
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%add>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -123,7 +127,8 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
Expand Down Expand Up @@ -170,7 +175,8 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
Expand Down Expand Up @@ -241,7 +247,8 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%cmp>
; CHECK-NEXT: BLEND ir<%d> = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp>
; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN store ir<%idx>, ir<%d>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -364,9 +371,11 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l.a> = load ir<%arrayidx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%l.a> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%b>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l.b> = load ir<%arrayidx2>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2>
; CHECK-NEXT: WIDEN ir<%l.b> = load vp<[[VEC_PTR2]]>
; CHECK-NEXT: EMIT vp<[[FMUL:%.+]]> = fmul nnan ninf nsz ir<%l.a>, ir<%l.b>
; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
Expand Down Expand Up @@ -415,7 +424,8 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%asd>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%isd>
; CHECK-NEXT: WIDEN ir<%lsd> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%psd> = add nuw nsw ir<%lsd>, ir<23>
; CHECK-NEXT: WIDEN ir<%cmp1> = icmp slt ir<%lsd>, ir<100>
; CHECK-NEXT: WIDEN ir<%cmp2> = icmp sge ir<%lsd>, ir<200>
Expand Down Expand Up @@ -443,7 +453,8 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp2>
; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = select vp<[[NOT1]]>, vp<[[NOT2]]>, ir<false>
; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]>/vp<[[OR1]]> ir<%psd>/vp<[[SEL2]]>
; CHECK-NEXT: WIDEN store ir<%isd>, ir<%ysd.0>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -560,7 +571,8 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off>
; CHECK-NEXT: WIDEN store ir<%gep>, ir<0>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -605,12 +617,14 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.y>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.y>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%add> = fadd nnan ir<%lv>, ir<1.000000e+00>
; CHECK-NEXT: WIDEN ir<%mul> = fmul reassoc nnan ninf nsz arcp contract afn ir<%add>, ir<2.000000e+00>
; CHECK-NEXT: WIDEN ir<%div> = fdiv reassoc nsz contract ir<%mul>, ir<2.000000e+00>
; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%div>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%div>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -656,11 +670,13 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%div.1> = udiv exact ir<%lv>, ir<20>
; CHECK-NEXT: WIDEN ir<%div.2> = udiv ir<%lv>, ir<60>
; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2>
; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.x>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -705,7 +721,8 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%ld.value> = load ir<%ld.addr>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%ld.addr>
; CHECK-NEXT: WIDEN ir<%ld.value> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%ifcond> = fcmp oeq ir<%ld.value>, ir<5.000000e+00>
; CHECK-NEXT: Successor(s): pred.call
; CHECK-EMPTY:
Expand All @@ -731,7 +748,8 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
; CHECK-NEXT: EMIT vp<[[NOT_COND:%.+]]> = not ir<%ifcond>
; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<[[NOT_COND]]> ir<%fadd>/ir<%ifcond>
; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN store ir<%st.addr>, ir<%st.value>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -786,11 +804,13 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%gep.x>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%or.1> = or disjoint ir<%lv>, ir<1>
; CHECK-NEXT: WIDEN ir<%or.2> = or ir<%lv>, ir<3>
; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2>
; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%add>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -835,7 +855,8 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%idx>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l>
; CHECK-NEXT: REPLICATE store ir<%zext>, ir<%p1>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,8 @@ define void @sinking_requires_duplication(ptr %addr) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%0> = load ir<%gep>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
; CHECK-NEXT: WIDEN ir<%0> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%pred> = fcmp oeq ir<%0>, ir<0.000000e+00>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = not ir<%pred>
; CHECK-NEXT: Successor(s): pred.store
Expand Down Expand Up @@ -1114,7 +1115,8 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION ir<%start>, -1
; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds ir<%ptr.iv>, ir<-1>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%ptr.iv.next>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0>
; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1>
; CHECK-NEXT: Successor(s): pred.store
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ define void @loop(ptr %X, ptr %Y) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer
Expand All @@ -61,8 +61,8 @@ define void @loop(ptr %X, ptr %Y) {
; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP11]], i64 2
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP11]], align 8
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[TMP12]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
Expand Down Expand Up @@ -149,20 +149,20 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope !4
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope !4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope !7
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope !7
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD9]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 4
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) {
; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i64 2
; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[WIDE_LOAD17]], [[VEC_PHI16]]
Expand Down Expand Up @@ -171,16 +171,16 @@ define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) {
; CHECK-NEXT: [[VEC_PHI28:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i64 2
; CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[START_I12_PEEL]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i64 2
; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD29]], [[VEC_PHI28]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,18 +180,18 @@ define void @test_runtime_trip_count(i32 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP1]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 2
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]]
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 2
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
Loading