diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index a58ab093a1f75..55079b4a42d2f 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -967,6 +967,9 @@ bool LoopPredication::isLoopProfitableToPredicate() { Numerator += Weight; Denominator += Weight; } + // If all weights are zero act as if there was no profile data + if (Denominator == 0) + return BranchProbability::getBranchProbability(1, NumSucc); return BranchProbability::getBranchProbability(Numerator, Denominator); } else { assert(LatchBlock != ExitingBlock && diff --git a/llvm/test/Transforms/LoopPredication/pr66382.ll b/llvm/test/Transforms/LoopPredication/pr66382.ll index 3ac4cac0615f4..f9a14d470453c 100644 --- a/llvm/test/Transforms/LoopPredication/pr66382.ll +++ b/llvm/test/Transforms/LoopPredication/pr66382.ll @@ -1,4 +1,4 @@ -; XFAIL: * +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require,loop-mssa(loop-predication)' %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" @@ -6,7 +6,26 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nocallback nofree nosync willreturn declare void @llvm.experimental.guard(i1, ...) #0 +; Check that LoopPredication doesn't crash on all-zero branch weights define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: Header: +; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[J_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false, i32 0) [ "deopt"() ] +; CHECK-NEXT: [[J_NEXT]] = add i64 [[J2]], 1 +; CHECK-NEXT: br i1 false, label [[LATCH]], label [[EXIT:%.*]] +; CHECK: Latch: +; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J2]], 0 +; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[COMMON_RET_LOOPEXIT:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: common.ret.loopexit: +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: exit: +; CHECK-NEXT: br label [[COMMON_RET]] +; entry: br label %Header diff --git a/llvm/test/Transforms/LoopPredication/scale.ll b/llvm/test/Transforms/LoopPredication/scale.ll new file mode 100644 index 0000000000000..29e48cf579598 --- /dev/null +++ b/llvm/test/Transforms/LoopPredication/scale.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require,loop-mssa(loop-predication)' -verify-memoryssa -loop-predication-latch-probability-scale=2 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PROF +; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require,loop-mssa(loop-predication)' -verify-memoryssa -loop-predication-latch-probability-scale=1.9 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOTPROF + +; LatchExitProbability: 0x20000000 / 0x80000000 = 25.00% +; ExitingBlockProbability: 0x40000000 / 0x80000000 = 50.00% +; Predicate is profitable when the scale factor is 2 and not profitable if it's less than 2. +define i64 @predicate_eq_ones(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { +; CHECK-PROF-LABEL: define i64 @predicate_eq_ones( +; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0:![0-9]+]] { +; CHECK-PROF-NEXT: entry: +; CHECK-PROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-PROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-PROF-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-PROF-NEXT: [[TMP3:%.*]] = freeze i1 [[TMP2]] +; CHECK-PROF-NEXT: br label [[HEADER:%.*]] +; CHECK-PROF: Header: +; CHECK-PROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-PROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-PROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-PROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] +; CHECK-PROF-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-PROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-PROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-PROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK-PROF: Latch: +; CHECK-PROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-PROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-PROF: exitLatch: +; CHECK-PROF-NEXT: ret i64 1 +; CHECK-PROF: exit: +; CHECK-PROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-PROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-PROF-NEXT: ret i64 [[RESULT_LE]] +; +; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_ones( +; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0:![0-9]+]] { +; CHECK-NOTPROF-NEXT: entry: +; CHECK-NOTPROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NOTPROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-NOTPROF-NEXT: br label [[HEADER:%.*]] +; CHECK-NOTPROF: Header: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-NOTPROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-NOTPROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-NOTPROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NOTPROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-NOTPROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-NOTPROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK-NOTPROF: Latch: +; CHECK-NOTPROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-NOTPROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NOTPROF: exitLatch: +; CHECK-NOTPROF-NEXT: ret i64 1 +; CHECK-NOTPROF: exit: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-NOTPROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-NOTPROF-NEXT: ret i64 [[RESULT_LE]] +; +entry: + %length.ext = zext i32 %length to i64 + %n.pre = load i64, ptr %n_addr, align 4 + br label %Header + +Header: ; preds = %entry, %Latch + %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] + %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] + %within.bounds = icmp ult i64 %j2, %length.ext + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %innercmp = icmp eq i64 %j2, %n.pre + %j.next = add nuw nsw i64 %j2, 1 + br i1 %innercmp, label %Latch, label %exit, !prof !0 + +Latch: ; preds = %Header + %speculate_trip_count = icmp ult i64 %j.next, 1048576 + br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 + +exitLatch: ; preds = %Latch + ret i64 1 + +exit: ; preds = %Header + %result.in3.lcssa = phi ptr [ %result.in3, %Header ] + %result.le = load i64, ptr %result.in3.lcssa, align 8 + ret i64 %result.le +} +!0 = !{!"branch_weights", i32 1, i32 1} + +; Same as the previous one, but with zero weights (should be treated as if no profile - equal probability) +define i64 @predicate_eq_zeroes(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { +; CHECK-PROF-LABEL: define i64 @predicate_eq_zeroes( +; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] { +; CHECK-PROF-NEXT: entry: +; CHECK-PROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-PROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-PROF-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-PROF-NEXT: [[TMP3:%.*]] = freeze i1 [[TMP2]] +; CHECK-PROF-NEXT: br label [[HEADER:%.*]] +; CHECK-PROF: Header: +; CHECK-PROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-PROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-PROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-PROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] +; CHECK-PROF-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-PROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-PROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-PROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK-PROF: Latch: +; CHECK-PROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-PROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]] +; CHECK-PROF: exitLatch: +; CHECK-PROF-NEXT: ret i64 1 +; CHECK-PROF: exit: +; CHECK-PROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-PROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-PROF-NEXT: ret i64 [[RESULT_LE]] +; +; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_zeroes( +; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] { +; CHECK-NOTPROF-NEXT: entry: +; CHECK-NOTPROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NOTPROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-NOTPROF-NEXT: br label [[HEADER:%.*]] +; CHECK-NOTPROF: Header: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-NOTPROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-NOTPROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-NOTPROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NOTPROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-NOTPROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-NOTPROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK-NOTPROF: Latch: +; CHECK-NOTPROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-NOTPROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]] +; CHECK-NOTPROF: exitLatch: +; CHECK-NOTPROF-NEXT: ret i64 1 +; CHECK-NOTPROF: exit: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-NOTPROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-NOTPROF-NEXT: ret i64 [[RESULT_LE]] +; +entry: + %length.ext = zext i32 %length to i64 + %n.pre = load i64, ptr %n_addr, align 4 + br label %Header + +Header: ; preds = %entry, %Latch + %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] + %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] + %within.bounds = icmp ult i64 %j2, %length.ext + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %innercmp = icmp eq i64 %j2, %n.pre + %j.next = add nuw nsw i64 %j2, 1 + br i1 %innercmp, label %Latch, label %exit, !prof !1 + +Latch: ; preds = %Header + %speculate_trip_count = icmp ult i64 %j.next, 1048576 + br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 + +exitLatch: ; preds = %Latch + ret i64 1 + +exit: ; preds = %Header + %result.in3.lcssa = phi ptr [ %result.in3, %Header ] + %result.le = load i64, ptr %result.in3.lcssa, align 8 + ret i64 %result.le +} +!1 = !{!"branch_weights", i32 0, i32 0} + +; No profile on br in Header +define i64 @predicate_eq_none(ptr nocapture readonly %arg, i32 %length, ptr nocapture readonly %arg2, ptr nocapture readonly %n_addr, i64 %i) !prof !21 { +; CHECK-PROF-LABEL: define i64 @predicate_eq_none( +; CHECK-PROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] { +; CHECK-PROF-NEXT: entry: +; CHECK-PROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-PROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-PROF-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] +; CHECK-PROF-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-PROF-NEXT: [[TMP3:%.*]] = freeze i1 [[TMP2]] +; CHECK-PROF-NEXT: br label [[HEADER:%.*]] +; CHECK-PROF: Header: +; CHECK-PROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-PROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-PROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-PROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ] +; CHECK-PROF-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-PROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-PROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-PROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-PROF: Latch: +; CHECK-PROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-PROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]] +; CHECK-PROF: exitLatch: +; CHECK-PROF-NEXT: ret i64 1 +; CHECK-PROF: exit: +; CHECK-PROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-PROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-PROF-NEXT: ret i64 [[RESULT_LE]] +; +; CHECK-NOTPROF-LABEL: define i64 @predicate_eq_none( +; CHECK-NOTPROF-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[LENGTH:%.*]], ptr nocapture readonly [[ARG2:%.*]], ptr nocapture readonly [[N_ADDR:%.*]], i64 [[I:%.*]]) !prof [[PROF0]] { +; CHECK-NOTPROF-NEXT: entry: +; CHECK-NOTPROF-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NOTPROF-NEXT: [[N_PRE:%.*]] = load i64, ptr [[N_ADDR]], align 4 +; CHECK-NOTPROF-NEXT: br label [[HEADER:%.*]] +; CHECK-NOTPROF: Header: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3:%.*]] = phi ptr [ [[ARG2]], [[ENTRY:%.*]] ], [ [[ARG]], [[LATCH:%.*]] ] +; CHECK-NOTPROF-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] +; CHECK-NOTPROF-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] +; CHECK-NOTPROF-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NOTPROF-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] +; CHECK-NOTPROF-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 +; CHECK-NOTPROF-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-NOTPROF: Latch: +; CHECK-NOTPROF-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 +; CHECK-NOTPROF-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF2]] +; CHECK-NOTPROF: exitLatch: +; CHECK-NOTPROF-NEXT: ret i64 1 +; CHECK-NOTPROF: exit: +; CHECK-NOTPROF-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi ptr [ [[RESULT_IN3]], [[HEADER]] ] +; CHECK-NOTPROF-NEXT: [[RESULT_LE:%.*]] = load i64, ptr [[RESULT_IN3_LCSSA]], align 8 +; CHECK-NOTPROF-NEXT: ret i64 [[RESULT_LE]] +; +entry: + %length.ext = zext i32 %length to i64 + %n.pre = load i64, ptr %n_addr, align 4 + br label %Header + +Header: ; preds = %entry, %Latch + %result.in3 = phi ptr [ %arg2, %entry ], [ %arg, %Latch ] + %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] + %within.bounds = icmp ult i64 %j2, %length.ext + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %innercmp = icmp eq i64 %j2, %n.pre + %j.next = add nuw nsw i64 %j2, 1 + br i1 %innercmp, label %Latch, label %exit + +Latch: ; preds = %Header + %speculate_trip_count = icmp ult i64 %j.next, 1048576 + br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 + +exitLatch: ; preds = %Latch + ret i64 1 + +exit: ; preds = %Header + %result.in3.lcssa = phi ptr [ %result.in3, %Header ] + %result.le = load i64, ptr %result.in3.lcssa, align 8 + ret i64 %result.le +} + +!2 = !{!"branch_weights", i32 3, i32 1} +!21 = !{!"function_entry_count", i64 20000} + +declare i64 @llvm.experimental.deoptimize.i64(...) +declare void @llvm.experimental.guard(i1, ...)