From 1fa2b49071a42ca871963904b1f65c0278571278 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 24 Sep 2025 18:44:53 +0100 Subject: [PATCH 1/3] [VPlan] Run CSE closer to VPlan::execute. Additional CSE opportunities are exposed after converting to concrete recipes/dissolving regions and materializing various expressions. Run CSE later, to capitalize on some of the late opportunities. --- .../Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../AArch64/masked-call-scalarize.ll | 17 +- .../LoopVectorize/AArch64/masked-call.ll | 4 +- ...row-interleave-to-widen-memory-scalable.ll | 6 +- .../LoopVectorize/ARM/mve-reductions.ll | 3 +- .../RISCV/blocks-with-dead-instructions.ll | 21 +-- .../LoopVectorize/RISCV/dead-ops-cost.ll | 11 +- .../RISCV/evl-compatible-loops.ll | 8 +- .../RISCV/gather-scatter-cost.ll | 9 +- .../LoopVectorize/RISCV/induction-costs.ll | 3 +- .../LoopVectorize/RISCV/mask-index-type.ll | 5 +- .../RISCV/masked_gather_scatter.ll | 6 +- ...ruction-or-drop-poison-generating-flags.ll | 5 +- .../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 34 ++-- .../LoopVectorize/RISCV/strided-accesses.ll | 40 ++-- .../RISCV/tail-folding-cast-intrinsics.ll | 5 +- .../tail-folding-fixed-order-recurrence.ll | 5 +- .../RISCV/tail-folding-interleave.ll | 9 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 30 ++- .../X86/replicate-uniform-call.ll | 5 +- .../LoopVectorize/reduction-inloop.ll | 177 +++++++++--------- 21 files changed, 169 insertions(+), 236 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4e8be28df7934..d5f8c82a2b78a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7228,7 +7228,6 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::narrowInterleaveGroups( BestVPlan, BestVF, TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); - VPlanTransforms::cse(BestVPlan); VPlanTransforms::removeDeadRecipes(BestVPlan); VPlanTransforms::convertToConcreteRecipes(BestVPlan); @@ -7242,6 +7241,7 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan, VectorPH, CM.foldTailByMasking(), CM.requiresScalarEpilogue(BestVF.isVector())); VPlanTransforms::materializeVFAndVFxUF(BestVPlan, VectorPH, BestVF); + VPlanTransforms::cse(BestVPlan); VPlanTransforms::simplifyRecipes(BestVPlan); // 0. Generate SCEV-dependent code in the entry, including TripCount, before diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index b157a2818e676..157b78704234a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -109,36 +109,35 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TFA_INTERLEAVE: pred.store.if: -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP20]], ptr [[P:%.*]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]] ; TFA_INTERLEAVE: pred.store.continue: ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; TFA_INTERLEAVE: pred.store.if4: -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 +; TFA_INTERLEAVE: pred.store.if3: +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] -; TFA_INTERLEAVE: pred.store.continue5: +; TFA_INTERLEAVE: pred.store.continue4: ; TFA_INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; TFA_INTERLEAVE: pred.store.if6: +; TFA_INTERLEAVE: pred.store.if5: ; TFA_INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP32]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE7]] -; TFA_INTERLEAVE: pred.store.continue7: +; TFA_INTERLEAVE: pred.store.continue6: ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; TFA_INTERLEAVE: pred.store.if8: +; TFA_INTERLEAVE: pred.store.if7: ; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] -; TFA_INTERLEAVE: pred.store.continue9: +; TFA_INTERLEAVE: pred.store.continue8: ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 4a1d56572c013..d8a81f9316e4b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -960,13 +960,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 ; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00 -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP8]], double 0.000000e+00, double 1.000000e+00 ; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP8]], double 0.000000e+00, double 1.000000e+00 -; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[PREDPHI]] ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK2]] ; TFA_INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[BB8:.*]], label %[[TMP9]] ; TFA_INTERLEAVE: [[BB8]]: -; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: store double [[PREDPHI3]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[TMP9]] ; TFA_INTERLEAVE: [[TMP9]]: ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll index 46ba7f645a03e..6c36dfb81311b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll @@ -16,7 +16,6 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -24,7 +23,7 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP1]], align 8 ; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -66,7 +65,6 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1111, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1111, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -75,7 +73,7 @@ define void @test_2xi64_unary_op_load_interleave_group(ptr noalias %data, ptr no ; CHECK-NEXT: [[TMP7:%.*]] = load , ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fneg [[TMP7]] ; CHECK-NEXT: store [[TMP9]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index 658b9a4569191..1540baab53719 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1679,8 +1679,7 @@ define i64 @test_std_q31(ptr %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]]) ; CHECK-NEXT: [[TMP4]] = add i64 [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP5]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP2]], [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) ; CHECK-NEXT: [[TMP8]] = add i64 [[VEC_PHI1]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index fde18785ce2c4..612e7c083bda1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -27,8 +27,7 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -101,8 +100,7 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -175,8 +173,7 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -259,8 +256,7 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -345,8 +341,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -449,8 +444,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP17]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) -; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -708,8 +702,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( zeroinitializer, align 4 [[TMP21]], splat (i1 true), i32 [[TMP18]]), !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 43fef428372dd..96c3a0d1a2f01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -36,8 +36,7 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP21]], splat (i1 true), i32 [[TMP16]]) -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -321,8 +320,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP19]], align 4 [[TMP16]], splat (i1 true), i32 [[TMP8]]) -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] @@ -403,9 +401,8 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP18:%.*]] = zext [[TMP17]] to ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP18]], align 4 [[TMP19]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 663ead8c13cbd..0a605563e45a9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -24,9 +24,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VEC_IND]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -80,8 +79,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP9]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index ea2ccb07b388b..5df4f703c1b1b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -111,8 +111,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23-NEXT: [[TMP4:%.*]] = mul [[VEC_IND]], splat (i64 7) ; RVA23-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START:%.*]], [[TMP4]] ; RVA23-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( zeroinitializer, align 1 [[TMP5]], splat (i1 true), i32 [[TMP2]]) -; RVA23-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64 -; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] +; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -149,8 +148,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23ZVL1024B-NEXT: [[TMP4:%.*]] = mul [[VEC_IND]], splat (i64 7) ; RVA23ZVL1024B-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START:%.*]], [[TMP4]] ; RVA23ZVL1024B-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0( zeroinitializer, align 1 [[TMP5]], splat (i1 true), i32 [[TMP2]]) -; RVA23ZVL1024B-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64 -; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] +; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; RVA23ZVL1024B-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -213,8 +211,7 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i32.nxv2p0( zeroinitializer, align 4 [[TMP7]], splat (i1 true), i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i8.nxv2p0( zeroinitializer, align 1 [[TMP7]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index d3db55e251870..83619250b4ad4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -69,8 +69,7 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTSPLAT25:%.*]] = shufflevector [[DOTSPLATINSERT24]], poison, zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i16, ptr [[A]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP59]], splat (i1 true), i32 [[TMP27]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP47]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT25]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 86b3a7e32c852..ae6c90c5ce188 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -34,9 +34,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP17:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; VLENUNK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP17]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP7]]) -; VLENUNK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] -; VLENUNK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] +; VLENUNK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 7d32302abfe24..89819f2be4967 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -52,8 +52,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; RV32-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -121,8 +120,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; RV64-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index d0068e134fd32..782c2f6c24fa4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -43,9 +43,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP26]], [[INDEX]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll index 5876a6bf32848..93c0a7455165b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll @@ -12,11 +12,9 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP0]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP0]], splat (i64 4) ; CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -30,12 +28,10 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) @@ -53,9 +49,8 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv12i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] @@ -104,11 +99,9 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv8i64() ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 2) ; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP1]] -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv8i64() -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 3) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP3:%.*]] = mul [[TMP0]], splat (i64 3) ; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION1:%.*]] = add zeroinitializer, [[TMP3]] -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv8i64() -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP4]], splat (i64 4) +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP5:%.*]] = mul [[TMP0]], splat (i64 4) ; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION2:%.*]] = add zeroinitializer, [[TMP5]] ; NO-REG-PRESSURE-CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]: @@ -122,12 +115,10 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]] ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP7]] ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64 -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]] +; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP7]] ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub [[VEC_IND]], splat (i64 1) @@ -145,9 +136,8 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3 ; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv24i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER9]], [[WIDE_MASKED_GATHER10]]) ; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) -; NO-REG-PRESSURE-CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] -; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] +; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add [[VEC_IND3]], [[BROADCAST_SPLAT6]] ; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add [[VEC_IND4]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 4c84913eea23d..ca1c710e614f3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -26,8 +26,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP16]], align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -143,8 +142,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP13]], align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -261,8 +259,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP20]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP9]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -834,8 +831,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP20]], align 4 [[TMP21]], splat (i1 true), i32 [[TMP43]]), !alias.scope [[META9:![0-9]+]], !noalias [[META6]] -; STRIDED-NEXT: [[TMP46:%.*]] = zext i32 [[TMP43]] to i64 -; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP46]] +; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP44]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; STRIDED-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -1184,21 +1180,16 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP18:%.*]] = mul [[TMP19]], [[DOTSPLAT10]] ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP18]] -; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP27]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[VECTOR_GEP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP21]] +; STRIDED-NEXT: [[VECTOR_GEP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP18]] ; STRIDED-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[VECTOR_GEP7]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META13:![0-9]+]] ; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP30]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META16:![0-9]+]], !noalias [[META13]] ; STRIDED-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64 ; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; STRIDED-NEXT: [[TMP20:%.*]] = zext i32 [[TMP14]] to i64 -; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP20]] +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP16]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP25]] -; STRIDED-NEXT: [[TMP22:%.*]] = zext i32 [[TMP14]] to i64 -; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP22]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP17]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] ; STRIDED-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; STRIDED-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; STRIDED: middle.block: @@ -1270,9 +1261,7 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector [[BROADCAST_SPLATINSERT10]], poison, zeroinitializer ; STRIDED-UF2-NEXT: [[TMP16:%.*]] = mul [[TMP15]], [[BROADCAST_SPLAT11]] ; STRIDED-UF2-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI9]], [[TMP16]] -; STRIDED-UF2-NEXT: [[TMP17:%.*]] = call @llvm.stepvector.nxv4i64() -; STRIDED-UF2-NEXT: [[TMP18:%.*]] = mul [[TMP17]], [[BROADCAST_SPLAT11]] -; STRIDED-UF2-NEXT: [[VECTOR_GEP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP18]] +; STRIDED-UF2-NEXT: [[VECTOR_GEP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP16]] ; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, [[VECTOR_GEP12]], [[TMP14]] ; STRIDED-UF2-NEXT: [[STEP_ADD13:%.*]] = getelementptr i8, [[VECTOR_GEP]], [[TMP14]] ; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP12]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] @@ -1284,8 +1273,7 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; STRIDED-UF2-NEXT: [[TMP21:%.*]] = mul i64 [[STRIDE]], [[TMP9]] ; STRIDED-UF2-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP21]] -; STRIDED-UF2-NEXT: [[TMP22:%.*]] = mul i64 [[STRIDE]], [[TMP9]] -; STRIDED-UF2-NEXT: [[PTR_IND15]] = getelementptr i8, ptr [[POINTER_PHI9]], i64 [[TMP22]] +; STRIDED-UF2-NEXT: [[PTR_IND15]] = getelementptr i8, ptr [[POINTER_PHI9]], i64 [[TMP21]] ; STRIDED-UF2-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-UF2-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED-UF2: middle.block: @@ -1363,9 +1351,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP4]], splat (i1 true), i32 [[TMP2]]) ; NOSTRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[EVL_BASED_IV]] ; NOSTRIDED-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[WIDE_MASKED_GATHER]], ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP2]]) -; NOSTRIDED-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64 -; NOSTRIDED-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP6]], [[EVL_BASED_IV]] -; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] +; NOSTRIDED-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP3]], [[EVL_BASED_IV]] +; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; NOSTRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; NOSTRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; NOSTRIDED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -1459,9 +1446,8 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP4]], splat (i1 true), i32 [[TMP2]]) ; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[EVL_BASED_IV]] ; STRIDED-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[WIDE_MASKED_GATHER]], ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP2]]) -; STRIDED-NEXT: [[TMP6:%.*]] = zext i32 [[TMP2]] to i64 -; STRIDED-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP6]], [[EVL_BASED_IV]] -; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] +; STRIDED-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP3]], [[EVL_BASED_IV]] +; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP3]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; STRIDED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index c10bfc32e6e29..6652fefb35d60 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1199,9 +1199,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP15:%.*]] = ptrtoint [[TMP14]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP15]], ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index 55facaa96631e..b153328663184 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -609,9 +609,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[TMP15:%.*]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR]], [[TMP20]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP15]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP7]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index a23933c7fb005..7c05f4613b575 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -153,8 +153,7 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP10]], splat (i1 true), i32 [[TMP4]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = add [[TMP9]], [[WIDE_MASKED_GATHER2]] ; IF-EVL-NEXT: [[TMP12]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP11]], [[VEC_PHI]], i32 [[TMP4]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -425,8 +424,7 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP10]], splat (i1 true), i32 [[TMP4]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = add [[TMP9]], [[WIDE_MASKED_GATHER2]] ; IF-EVL-NEXT: [[TMP12]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP11]], [[VEC_PHI]], i32 [[TMP4]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -696,8 +694,7 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = add [[TMP13]], [[WIDE_MASKED_GATHER5]] ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index adfecdff8fcc8..9095d6e87ad4f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -285,9 +285,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[WIDE_MASKED_GATHER]], zeroinitializer ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[PREDPHI]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP17]]) -; SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP17]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -383,9 +382,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[WIDE_MASKED_GATHER]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[PREDPHI]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -698,9 +696,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT1]], splat (i1 true), i32 [[TMP7]]) ; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[TMP10]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[TMP10]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -780,9 +777,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP9]]) ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT3]], ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -843,9 +839,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP7]]) ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -940,9 +935,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP9]]) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index bdef894794850..90f3df50153a2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -40,13 +40,10 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: ; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 0, i64 0) -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] ; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C]], i64 1, i64 [[TMP13]] -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8 +; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8 ; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index f6e8de608645a..f77974f563116 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -23,11 +23,11 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] @@ -63,11 +63,11 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP5]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] @@ -125,11 +125,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -185,11 +185,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -251,11 +251,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] @@ -296,11 +296,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] @@ -360,11 +360,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -420,11 +420,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -491,11 +491,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -549,11 +549,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -617,11 +617,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -668,11 +668,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP11]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1679,11 +1679,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1703,11 +1703,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1778,11 +1778,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1838,11 +1838,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1907,11 +1907,11 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -1957,11 +1957,11 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2021,11 +2021,11 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: +; CHECK: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2071,11 +2071,11 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: +; CHECK-INTERLEAVED: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2388,14 +2388,14 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: br i1 [[C]], label %[[FOO:.*]], label %[[BAR:.*]] -; CHECK: [[FOO]]: +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: ; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[BAR]]: +; CHECK: [[ELSE]]: ; CHECK-NEXT: [[MULADD:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP10]], float [[SUM]]) ; CHECK-NEXT: br label %[[LATCH]] ; CHECK: [[LATCH]]: -; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[FOO]] ], [ [[MULADD]], %[[BAR]] ] +; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[IF]] ], [ [[MULADD]], %[[ELSE]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]] @@ -2675,8 +2675,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP18]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP11]] = add i32 [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2746,12 +2745,10 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i32 [[VEC_PHI1]], [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP31]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP29]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17]] = add i32 [[TMP11]], [[TMP16]] -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP22]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP18]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[TMP14]], [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 From 2a643289114a5bba7160938a0f0c2acf38d122ec Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 25 Sep 2025 18:01:06 +0100 Subject: [PATCH 2/3] !fixup undo unrelated test changes --- .../LoopVectorize/reduction-inloop.ll | 174 +++++++++--------- 1 file changed, 88 insertions(+), 86 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index f77974f563116..3f5e158e154ea 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -23,11 +23,11 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] @@ -63,11 +63,11 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP5]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] @@ -125,11 +125,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -185,11 +185,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -251,11 +251,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] @@ -296,11 +296,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] @@ -360,11 +360,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -420,11 +420,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -491,11 +491,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -549,11 +549,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -617,11 +617,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -668,11 +668,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP11]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1679,11 +1679,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[_LR_PH:.*]]: +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1703,11 +1703,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1778,11 +1778,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1838,11 +1838,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1907,11 +1907,11 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -1957,11 +1957,11 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2021,11 +2021,11 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[_LR_PH:.*]]: +; CHECK: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] ; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2071,11 +2071,11 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 @@ -2388,14 +2388,14 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[ELSE:.*]] -; CHECK: [[IF]]: +; CHECK-NEXT: br i1 [[C]], label %[[FOO:.*]], label %[[BAR:.*]] +; CHECK: [[FOO]]: ; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[ELSE]]: +; CHECK: [[BAR]]: ; CHECK-NEXT: [[MULADD:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP10]], float [[SUM]]) ; CHECK-NEXT: br label %[[LATCH]] ; CHECK: [[LATCH]]: -; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[IF]] ], [ [[MULADD]], %[[ELSE]] ] +; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[FOO]] ], [ [[MULADD]], %[[BAR]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]] @@ -2745,10 +2745,12 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i32 [[VEC_PHI1]], [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP29]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP31]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17]] = add i32 [[TMP11]], [[TMP16]] -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP22]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP18]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[TMP14]], [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 From 24ce7708edbbc00692a0d90693ad48db8dd30195 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 26 Sep 2025 10:02:09 +0100 Subject: [PATCH 3/3] !fxup update checks for additional run-line --- llvm/test/Transforms/LoopVectorize/reduction-inloop.ll | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 3f5e158e154ea..f4d4cca0d4220 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -2745,12 +2745,10 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i32 [[VEC_PHI1]], [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP31]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP27]], <4 x i32> [[TMP29]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17]] = add i32 [[TMP11]], [[TMP16]] -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP22]], <4 x i32> zeroinitializer +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP28]], <4 x i32> [[TMP30]], <4 x i32> zeroinitializer ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP18]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[TMP14]], [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8