24 changes: 12 additions & 12 deletions llvm/test/Transforms/LoopVectorize/reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
Expand Down Expand Up @@ -110,8 +110,8 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
Expand Down Expand Up @@ -191,8 +191,8 @@ define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
Expand Down Expand Up @@ -272,8 +272,8 @@ define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
Expand Down Expand Up @@ -351,8 +351,8 @@ define i32 @start_at_non_zero(ptr %in, ptr %coeff, ptr %out, i32 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
Expand Down Expand Up @@ -427,8 +427,8 @@ define i32 @reduction_and(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
Expand Down Expand Up @@ -503,8 +503,8 @@ define i32 @reduction_or(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
Expand Down Expand Up @@ -579,8 +579,8 @@ define i32 @reduction_xor(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
Expand Down Expand Up @@ -699,8 +699,8 @@ define i32 @reduction_sub_lhs(i32 %n, ptr %A) {
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
Expand Down Expand Up @@ -1063,8 +1063,8 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
Expand Down Expand Up @@ -1221,8 +1221,8 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
Expand Down Expand Up @@ -1299,8 +1299,8 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
; CHECK: middle.block:
; CHECK-NEXT: %cmp.n = icmp eq i32 256, %n.vec
; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 8 x i8> [[TMP38]], [[TMP37]]
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]]
; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body
; CHECK-VF2IC1: middle.block:
; CHECK-VF2IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[PREDPHI]])
; CHECK-VF2IC1-NEXT: [[FR_TMP20:%.*]] = freeze i1 [[TMP20]]
; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP20]], i32 1, i32 0
Expand Down Expand Up @@ -85,6 +86,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i1 [[TMP15]], i1 [[VEC_PHI2]]
; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body
; CHECK-VF1IC2: middle.block:
; CHECK-VF1IC2-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF1IC2-NEXT: [[OR:%.*]] = or i1 [[PREDPHI5]], [[PREDPHI]]
; CHECK-VF1IC2-NEXT: [[FR_OR:%.*]] = freeze i1 [[OR]]
; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_OR]], i32 1, i32 0
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/Transforms/LoopVectorize/select-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3
Expand All @@ -33,6 +34,7 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]]
; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]]
; CHECK-VF4IC4: middle.block:
; CHECK-VF4IC4-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]]
; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]]
; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]]
Expand Down Expand Up @@ -63,6 +65,7 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]]
; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]]
; CHECK-VF1IC4: middle.block:
; CHECK-VF1IC4-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]]
; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]]
; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]]
Expand Down Expand Up @@ -96,6 +99,7 @@ define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3
Expand Down Expand Up @@ -131,6 +135,7 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64
; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a
Expand Down Expand Up @@ -162,6 +167,7 @@ define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2
Expand Down Expand Up @@ -193,6 +199,7 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) {
; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2
Expand Down Expand Up @@ -227,6 +234,7 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true>
; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]]
; CHECK-VF4IC1: middle.block:
; CHECK-VF4IC1-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]])
; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]]
; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
; CHECK: for.body.preheader13:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
Expand Down Expand Up @@ -150,9 +150,9 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]]
; CHECK: for.body.preheader11:
; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
Expand Down