diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 10b2f5df2e23e..a5db4efcb0940 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1640,14 +1640,15 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, // Try to simplify the branch condition if TC <= VF * UF when the latch // terminator is BranchOnCount or BranchOnCond where the input is // Not(ActiveLaneMask). - const SCEV *TripCount = - vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); - assert(!isa(TripCount) && + const SCEV *VectorTripCount = + vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE); + if (isa(VectorTripCount)) + VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); + assert(!isa(VectorTripCount) && "Trip count SCEV must be computable"); ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); - const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); - if (TripCount->isZero() || - !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) + const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements); + if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C)) return false; } else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) { // For BranchOnCond, check if we can prove the condition to be true using VF diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index d081487cadc5c..7c4e8afe184d6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -351,8 +351,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]]) ; CHECK-NEXT: br label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 1a385b6a25481..be63d68425957 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -12,14 +12,9 @@ define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 64 +; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3:%.*]], align 64 +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -27,13 +22,13 @@ define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I]] +; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I]] ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[LDADDR]], align 64 -; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I]] +; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I]] ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -70,7 +65,7 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -86,7 +81,7 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <2 x i8> [[WIDE_LOAD2]], ptr [[TMP9]], align 64 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 18 -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -100,7 +95,7 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 18 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -137,7 +132,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -153,7 +148,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <2 x i8> [[WIDE_LOAD2]], ptr [[TMP9]], align 64 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 18 -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -167,7 +162,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 19 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -216,7 +211,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD3]], ptr [[TMP10]], align 64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -232,7 +227,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD5]], ptr [[TMP15]], align 64 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 20 -; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -246,7 +241,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 20 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -282,7 +277,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -296,7 +291,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 452868ddd59c1..5026fe8c27836 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2811,8 +2811,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]] ; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 27fa57928aa96..ed26ab5b33584 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 --check-globals none ; RUN: opt -passes='loop-vectorize,verify' -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF8UF1 %s ; RUN: opt -passes='loop-vectorize,verify' -force-vector-width=8 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF8UF2 %s ; RUN: opt -passes='loop-vectorize,verify' -force-vector-width=16 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF16UF1 %s @@ -1238,18 +1238,15 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1 -; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP4]], align 1 +; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[SCALAR_PH]] ; VF8UF2: [[SCALAR_PH]]: @@ -1265,7 +1262,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: ret void ; @@ -1277,14 +1274,10 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1 ; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10) -; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1 -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[A]], align 1 +; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[SCALAR_PH]] ; VF16UF1: [[SCALAR_PH]]: @@ -1300,7 +1293,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: ret void ; @@ -1321,33 +1314,3 @@ loop: exit: ret void } -;. -; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; VF8UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; VF8UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; VF8UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} -; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} -; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; VF8UF1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; VF8UF1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -;. -; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} -; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -;. -; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} -; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -;.