158 changes: 78 additions & 80 deletions llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll

Large diffs are not rendered by default.

65 changes: 31 additions & 34 deletions llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,54 +8,51 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-LABEL: @inv_load_conditional(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX]], 16
; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX4]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>*
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP4]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> undef), !alias.scope !3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> undef), !alias.scope !3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX]], 8
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX4]], 8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX9:%.*]] = select i1 [[TMP7]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX9:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX9]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer
Expand All @@ -64,18 +61,18 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX12]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP10]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX12]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX12]], 8
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC11]]
; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC11]]
; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], [[LOOP7:!llvm.loop !.*]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP9]], <8 x i32> undef)
; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX9]], [[N_VEC11]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i32 7
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i32 7
; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
Expand All @@ -95,10 +92,10 @@ define i32 @inv_load_conditional(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], [[LOOP9:!llvm.loop !.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: [[A_LCSSA_LCSSA8:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[A_LCSSA_LCSSA8:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA8]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA8]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[A_LCSSA_LCSSA]]
;
entry:
Expand Down
220 changes: 106 additions & 114 deletions llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll

Large diffs are not rendered by default.

60 changes: 29 additions & 31 deletions llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,55 +14,53 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
; CHECK-NEXT: [[DOT11:%.*]] = bitcast i8 addrspace(1)* [[DOT10]] to i8 addrspace(1)* addrspace(1)*
; CHECK-NEXT: [[DOT12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 16
; CHECK-NEXT: [[DOT13:%.*]] = bitcast i8 addrspace(1)* [[DOT12]] to i8 addrspace(1)* addrspace(1)*
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2:%.*]], 1
; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 16
; CHECK-NEXT: [[UMAX2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2:%.*]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX2]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], 1
; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP4]], i64 [[TMP2]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[UMAX1]], 3
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 16
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP6]]
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP6]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP2]]
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 1)
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[UMAX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 16
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP4]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP4]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[DOT12]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX]], -16
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[UMAX2]], -16
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP5]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP6]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 4
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP7]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 8
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP9]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 8
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 12
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP11]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP7]], i64 12
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP13]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 4
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP15]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 4
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 8
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP17]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 8
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 12
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP19]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP15]], i64 12
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP21]] to <4 x i8 addrspace(1)*> addrspace(1)*
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP22]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
Expand Down
84 changes: 41 additions & 43 deletions llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,63 +39,61 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[UMIN1]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP6]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1
; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]]
; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMIN1]]
; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]])
; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[CONV3]], -1
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[UMIN]]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]]
; CHECK-NEXT: [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]]
; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]]
; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]]
; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]]
; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4
; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1
; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1
; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP20]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], -4
; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP26:%.*]] = add i8 [[TMP22]], -1
; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1
; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP26]] to i32
; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]]
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]]
; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY8:%.*]]
; CHECK: for.body8:
; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
Expand All @@ -106,7 +104,7 @@ define i32 @main() local_unnamed_addr #0 {
; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]]
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], [[LOOP2:!llvm.loop !.*]]
; CHECK: for.cond4.for.inc9_crit_edge:
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
; CHECK-NEXT: br label [[FOR_INC9]]
; CHECK: for.inc9:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -437,41 +437,39 @@ exit:
define i16 @multiple_exit(i16* %p, i32 %n) {
; CHECK-LABEL: @multiple_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP11]], <4 x i16>* [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
Expand Down Expand Up @@ -524,41 +522,39 @@ if.end:
define i16 @multiple_exit2(i16* %p, i32 %n) {
; CHECK-LABEL: @multiple_exit2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 4
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 4, i32 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP5]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP13]], <4 x i16>* [[TMP14]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP11]], <4 x i16>* [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -649,13 +649,14 @@ for.end:
define i32 @sink_into_replication_region(i32 %y) {
; CHECK-LABEL: @sink_into_replication_region(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
Expand Down Expand Up @@ -741,13 +742,14 @@ bb:
define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
; CHECK-LABEL: @sink_into_replication_region_multiple(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
Expand Down
136 changes: 113 additions & 23 deletions llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,53 @@ define void @bug18724(i1 %cond) {
; UNROLL-NEXT: entry:
; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]])
; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0)
; UNROLL-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef
; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]]
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ]
; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ]
; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]]
; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]]
; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]]
; UNROLL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4
; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
; UNROLL-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE4]]
; UNROLL: pred.store.if:
; UNROLL-NEXT: store i32 2, i32* [[TMP4]], align 4
; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE4]]
; UNROLL: pred.store.continue4:
; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], 1
; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[VEC_PHI2]], 1
; UNROLL-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP8]]
; UNROLL-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP9]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]]
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; UNROLL-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true
; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP11]])
; UNROLL-NEXT: br label [[SCALAR_PH]]
; UNROLL: scalar.ph:
; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ]
; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NEXT: br label [[FOR_BODY14:%.*]]
; UNROLL: for.body14:
; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
Expand All @@ -221,8 +264,16 @@ define void @bug18724(i1 %cond) {
; UNROLL-NOSIMPLIFY: for.body9:
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND:%.*]], label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]]
; UNROLL-NOSIMPLIFY: for.body14.preheader:
; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NOSIMPLIFY-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0)
; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], undef
; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; UNROLL-NOSIMPLIFY: vector.ph:
; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]]
; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NOSIMPLIFY: vector.body:
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
Expand All @@ -231,33 +282,33 @@ define void @bug18724(i1 %cond) {
; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]]
; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4
; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; UNROLL-NOSIMPLIFY: pred.store.if:
; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP0]], align 4
; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP3]], align 4
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]]
; UNROLL-NOSIMPLIFY: pred.store.continue:
; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; UNROLL-NOSIMPLIFY: pred.store.if3:
; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP1]], align 4
; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP4]], align 4
; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE4]]
; UNROLL-NOSIMPLIFY: pred.store.continue4:
; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i32 [[VEC_PHI]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i32 [[VEC_PHI2]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP4]]
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i32 [[VEC_PHI]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI2]], 1
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP7]]
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP8]]
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
; UNROLL-NOSIMPLIFY: middle.block:
; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]]
; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0
; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; UNROLL-NOSIMPLIFY: scalar.ph:
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ]
; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ]
; UNROLL-NOSIMPLIFY-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[FOR_BODY14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY14:%.*]]
; UNROLL-NOSIMPLIFY: for.body14:
Expand Down Expand Up @@ -287,10 +338,49 @@ define void @bug18724(i1 %cond) {
; VEC-NEXT: entry:
; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]])
; VEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0)
; VEC-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef
; VEC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; VEC: vector.ph:
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; VEC-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]]
; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 undef, i32 0>, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]]
; VEC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[TMP4]]
; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
; VEC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4
; VEC-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.if:
; VEC-NEXT: store i32 2, i32* [[TMP5]], align 4
; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]]
; VEC: pred.store.continue2:
; VEC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_PHI]], <i32 1, i32 1>
; VEC-NEXT: [[PREDPHI]] = select <2 x i1> undef, <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP8]]
; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; VEC: middle.block:
; VEC-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]])
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; VEC-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true
; VEC-NEXT: call void @llvm.assume(i1 [[TMP11]])
; VEC-NEXT: br label [[SCALAR_PH]]
; VEC: scalar.ph:
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ]
; VEC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
; VEC-NEXT: br label [[FOR_BODY14:%.*]]
; VEC: for.body14:
; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
Expand Down Expand Up @@ -367,7 +457,7 @@ define void @minimal_bit_widths(i1 %c) {
; UNROLL: pred.store.continue6:
; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; UNROLL: middle.block:
; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
Expand All @@ -386,7 +476,7 @@ define void @minimal_bit_widths(i1 %c) {
; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; UNROLL: for.end:
; UNROLL-NEXT: ret void
;
Expand Down Expand Up @@ -485,7 +575,7 @@ define void @minimal_bit_widths(i1 %c) {
; VEC: pred.store.continue3:
; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; VEC: middle.block:
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef
; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
Expand All @@ -504,7 +594,7 @@ define void @minimal_bit_widths(i1 %c) {
; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1
; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]]
; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]]
; VEC: for.end:
; VEC-NEXT: ret void
;
Expand Down
415 changes: 204 additions & 211 deletions llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,13 @@ for.end: ; preds = %for.body
; CHECK-LABEL: inv_val_store_to_inv_address_conditional_diff_values_ic
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
Expand Down Expand Up @@ -272,15 +270,13 @@ for.end: ; preds = %for.body
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A4]], i64 1
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
Expand Down Expand Up @@ -367,16 +363,14 @@ for.end: ; preds = %for.body
define i32 @variant_val_store_to_inv_address(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-LABEL: @variant_val_store_to_inv_address(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N:%.*]], 1
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
; CHECK-NEXT: [[SMAX3:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX3]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B2]]
Expand Down
219 changes: 105 additions & 114 deletions llvm/test/Transforms/LoopVectorize/loop-form.ll

Large diffs are not rendered by default.