diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3b16248f962bc..e14f985efd96a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7683,6 +7683,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( BranchInst &BI = *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { + // FIXME: See test Transforms/LoopVectorize/branch-weights.ll. I don't + // think the MainLoopStep is correct. unsigned MainLoopStep = UF * VF.getKnownMinValue(); unsigned EpilogueLoopStep = EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue(); diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index e11f77d8aeaec..6892709f085f7 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -1,53 +1,103 @@ -; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5 +; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \ +; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4 +; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \ +; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4 -; CHECK-LABEL: @f0( -; -; CHECK: entry: -; CHECK: br i1 %cmp.entry, label %iter.check, label %exit, !prof [[PROF_F0_ENTRY:![0-9]+]] -; -; CHECK: iter.check: -; CHECK: br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.scevcheck, !prof [[PROF_F0_UNLIKELY:![0-9]+]] -; -; CHECK: vector.scevcheck: -; CHECK: br i1 %4, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check, !prof [[PROF_F0_UNLIKELY]] -; -; CHECK: vector.main.loop.iter.check: -; CHECK: br i1 %min.iters.check1, label %vec.epilog.ph, label %vector.ph, !prof [[PROF_F0_UNLIKELY]] -; -; CHECK: vector.ph: -; CHECK: br label %vector.body -; -; CHECK: vector.body: -; CHECK: br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]] -; -; CHECK: middle.block: -; CHECK: br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] -; -; CHECK: vec.epilog.iter.check: -; CHECK: br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_VEC_EPILOGUE_SKIP:![0-9]+]] -; -; CHECK: vec.epilog.ph: -; CHECK: br label %vec.epilog.vector.body -; -; CHECK: vec.epilog.vector.body: -; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] -; -; CHECK: vec.epilog.middle.block: -; CHECK: br i1 %cmp.n{{.+}}, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] -; -; CHECK: vec.epilog.scalar.ph: -; CHECK: br label %loop -; -; CHECK: loop: -; CHECK: br i1 %cmp.loop, label %loop, label %exit.loopexit, !prof [[PROF_F0_LOOP:![0-9]+]] +; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of +; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8 +; scalar iterations in the main loop, leaving the remaining count to +; be in the range [0,7]. That gives a 4:4 chance of skipping the +; vector epilogue. I believe the problem lies in +; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck +; where the main loop VF is set to the same value as the epilogue VF. +define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { +; MAINVF4IC1_EPI4-LABEL: define void @f0( +; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] { +; MAINVF4IC1_EPI4: [[ENTRY:.*:]] +; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0 +; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]] +; MAINVF4IC1_EPI4: [[ITER_CHECK]]: +; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4 +; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]] +; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]: +; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0 +; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255 +; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]] +; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4 +; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]] +; MAINVF4IC1_EPI4: [[VECTOR_PH]]: +; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]] +; MAINVF4IC1_EPI4: [[VECTOR_BODY]]: +; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]] +; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] +; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]: +; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]: +; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 +; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]: +; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: +; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] +; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]: +; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] +; MAINVF4IC1_EPI4: [[LOOP]]: +; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: +; MAINVF4IC1_EPI4: br label %[[EXIT]] +; MAINVF4IC1_EPI4: [[EXIT]]: ; -; CHECK: exit.loopexit: -; CHECK: br label %exit +; MAINVF4IC2_EPI4-LABEL: define void @f0( +; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] { +; MAINVF4IC2_EPI4: [[ENTRY:.*:]] +; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0 +; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]] +; MAINVF4IC2_EPI4: [[ITER_CHECK]]: +; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4 +; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]] +; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]: +; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0 +; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255 +; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]] +; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8 +; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]] +; MAINVF4IC2_EPI4: [[VECTOR_PH]]: +; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]] +; MAINVF4IC2_EPI4: [[VECTOR_BODY]]: +; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]] +; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] +; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]: +; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]: +; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 +; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]: +; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: +; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] +; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: +; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] +; MAINVF4IC2_EPI4: [[LOOP]]: +; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: +; MAINVF4IC2_EPI4: br label %[[EXIT]] +; MAINVF4IC2_EPI4: [[EXIT]]: ; -; CHECK: exit: -; CHECK: ret void - -define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { entry: %cmp.entry = icmp sgt i32 %len, 0 br i1 %cmp.entry, label %loop, label %exit, !prof !1 @@ -72,11 +122,33 @@ exit: !0 = !{!"function_entry_count", i64 13} !1 = !{!"branch_weights", i32 12, i32 1} !2 = !{!"branch_weights", i32 1234, i32 1} - -; CHECK: [[PROF_F0_ENTRY]] = !{!"branch_weights", i32 12, i32 1} -; CHECK: [[PROF_F0_UNLIKELY]] = !{!"branch_weights", i32 1, i32 127} -; CHECK: [[PROF_F0_VECTOR_BODY]] = !{!"branch_weights", i32 1, i32 307} -; CHECK: [[PROF_F0_MIDDLE_BLOCKS]] = !{!"branch_weights", i32 1, i32 3} -; CHECK: [[PROF_F0_VEC_EPILOGUE_SKIP]] = !{!"branch_weights", i32 4, i32 0} -; CHECK: [[PROF_F0_VEC_EPILOG_VECTOR_BODY]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[PROF_F0_LOOP]] = !{!"branch_weights", i32 2, i32 1} +;. +; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} +; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} +; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} +; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307} +; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} +; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} +; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +;. +; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} +; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} +; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} +; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153} +; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} +; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} +; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7} +; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} +; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +;.