83 changes: 78 additions & 5 deletions llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
;
; This is a test case that required a number of setup passes because
Expand All @@ -12,12 +13,72 @@ declare i32 @getval() nounwind
; Check that the loop exit merges values from all the iterations. This
; could be a tad fragile, but it's a good test.
;
; CHECK-LABEL: @foo(
; CHECK: return:
; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
; CHECK-NOT: @bar(
; CHECK: bar.exit.3
define i32 @foo() uwtable ssp align 2 {
;
; CHECK-LABEL: @foo(
; CHECK-NEXT: if.end:
; CHECK-NEXT: [[CALL2:%.*]] = call i32 @getval()
; CHECK-NEXT: br label [[LAND_LHS_TRUE_I:%.*]]
; CHECK: land.lhs.true.i:
; CHECK-NEXT: [[CMP4_I:%.*]] = call zeroext i1 @check() #[[ATTR0:[0-9]+]]
; CHECK-NEXT: br i1 [[CMP4_I]], label [[BAR_EXIT:%.*]], label [[DO_COND:%.*]]
; CHECK: bar.exit:
; CHECK-NEXT: [[TMP7_I:%.*]] = call i32 @getval() #[[ATTR0]]
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[TMP7_I]], 0
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[DO_COND]], label [[LAND_LHS_TRUE:%.*]]
; CHECK: land.lhs.true:
; CHECK-NEXT: [[CALL10:%.*]] = call i32 @getval()
; CHECK-NEXT: [[CMP11:%.*]] = icmp eq i32 [[CALL10]], 0
; CHECK-NEXT: br i1 [[CMP11]], label [[RETURN:%.*]], label [[DO_COND]]
; CHECK: do.cond:
; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CALL2]], -1
; CHECK-NEXT: br i1 [[CMP18]], label [[LAND_LHS_TRUE_I_1:%.*]], label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ 0, [[DO_COND]] ], [ [[TMP7_I_1:%.*]], [[LAND_LHS_TRUE_1:%.*]] ], [ 0, [[DO_COND_1:%.*]] ], [ [[TMP7_I_2:%.*]], [[LAND_LHS_TRUE_2:%.*]] ], [ 0, [[DO_COND_2:%.*]] ], [ [[TMP7_I_3:%.*]], [[LAND_LHS_TRUE_3:%.*]] ], [ 0, [[DO_COND_3:%.*]] ]
; CHECK-NEXT: ret i32 [[RETVAL_0]]
; CHECK: land.lhs.true.i.1:
; CHECK-NEXT: [[CMP4_I_1:%.*]] = call zeroext i1 @check() #[[ATTR0]]
; CHECK-NEXT: br i1 [[CMP4_I_1]], label [[BAR_EXIT_1:%.*]], label [[DO_COND_1]]
; CHECK: bar.exit.1:
; CHECK-NEXT: [[TMP7_I_1]] = call i32 @getval() #[[ATTR0]]
; CHECK-NEXT: [[CMP_NOT_1:%.*]] = icmp eq i32 [[TMP7_I_1]], 0
; CHECK-NEXT: br i1 [[CMP_NOT_1]], label [[DO_COND_1]], label [[LAND_LHS_TRUE_1]]
; CHECK: land.lhs.true.1:
; CHECK-NEXT: [[CALL10_1:%.*]] = call i32 @getval()
; CHECK-NEXT: [[CMP11_1:%.*]] = icmp eq i32 [[CALL10_1]], 0
; CHECK-NEXT: br i1 [[CMP11_1]], label [[RETURN]], label [[DO_COND_1]]
; CHECK: do.cond.1:
; CHECK-NEXT: [[CMP18_1:%.*]] = icmp sgt i32 [[CALL2]], -1
; CHECK-NEXT: br i1 [[CMP18_1]], label [[LAND_LHS_TRUE_I_2:%.*]], label [[RETURN]]
; CHECK: land.lhs.true.i.2:
; CHECK-NEXT: [[CMP4_I_2:%.*]] = call zeroext i1 @check() #[[ATTR0]]
; CHECK-NEXT: br i1 [[CMP4_I_2]], label [[BAR_EXIT_2:%.*]], label [[DO_COND_2]]
; CHECK: bar.exit.2:
; CHECK-NEXT: [[TMP7_I_2]] = call i32 @getval() #[[ATTR0]]
; CHECK-NEXT: [[CMP_NOT_2:%.*]] = icmp eq i32 [[TMP7_I_2]], 0
; CHECK-NEXT: br i1 [[CMP_NOT_2]], label [[DO_COND_2]], label [[LAND_LHS_TRUE_2]]
; CHECK: land.lhs.true.2:
; CHECK-NEXT: [[CALL10_2:%.*]] = call i32 @getval()
; CHECK-NEXT: [[CMP11_2:%.*]] = icmp eq i32 [[CALL10_2]], 0
; CHECK-NEXT: br i1 [[CMP11_2]], label [[RETURN]], label [[DO_COND_2]]
; CHECK: do.cond.2:
; CHECK-NEXT: [[CMP18_2:%.*]] = icmp sgt i32 [[CALL2]], -1
; CHECK-NEXT: br i1 [[CMP18_2]], label [[LAND_LHS_TRUE_I_3:%.*]], label [[RETURN]]
; CHECK: land.lhs.true.i.3:
; CHECK-NEXT: [[CMP4_I_3:%.*]] = call zeroext i1 @check() #[[ATTR0]]
; CHECK-NEXT: br i1 [[CMP4_I_3]], label [[BAR_EXIT_3:%.*]], label [[DO_COND_3]]
; CHECK: bar.exit.3:
; CHECK-NEXT: [[TMP7_I_3]] = call i32 @getval() #[[ATTR0]]
; CHECK-NEXT: [[CMP_NOT_3:%.*]] = icmp eq i32 [[TMP7_I_3]], 0
; CHECK-NEXT: br i1 [[CMP_NOT_3]], label [[DO_COND_3]], label [[LAND_LHS_TRUE_3]]
; CHECK: land.lhs.true.3:
; CHECK-NEXT: [[CALL10_3:%.*]] = call i32 @getval()
; CHECK-NEXT: [[CMP11_3:%.*]] = icmp eq i32 [[CALL10_3]], 0
; CHECK-NEXT: br i1 [[CMP11_3]], label [[RETURN]], label [[DO_COND_3]]
; CHECK: do.cond.3:
; CHECK-NEXT: [[CMP18_3:%.*]] = icmp sgt i32 [[CALL2]], -1
; CHECK-NEXT: br i1 [[CMP18_3]], label [[LAND_LHS_TRUE_I]], label [[RETURN]], !llvm.loop [[LOOP0:![0-9]+]]
;
entry:
br i1 undef, label %return, label %if.end

Expand Down Expand Up @@ -45,6 +106,18 @@ return: ; preds = %do.cond, %land.lhs.
}

define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
;
; CHECK-LABEL: @bar(
; CHECK-NEXT: land.lhs.true:
; CHECK-NEXT: [[CMP4:%.*]] = call zeroext i1 @check()
; CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @getval()
; CHECK-NEXT: br label [[COND_END]]
; CHECK: cond.end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ 0, [[LAND_LHS_TRUE:%.*]] ]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
br i1 undef, label %land.lhs.true, label %cond.end

Expand Down
672 changes: 583 additions & 89 deletions llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll

Large diffs are not rendered by default.

199 changes: 145 additions & 54 deletions llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -loop-unroll < %s | FileCheck %s
; RUN: opt -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' < %s | FileCheck %s

; Unroll twice, with first loop exit kept
; CHECK-LABEL: @s32_max1
; CHECK: do.body:
; CHECK: store
; CHECK: br i1 %cmp, label %do.body.1, label %do.end
; CHECK: do.end:
; CHECK: ret void
; CHECK: do.body.1:
; CHECK: store
; CHECK: br label %do.end
define void @s32_max1(i32 %n, i32* %p) {
;
; CHECK-LABEL: @s32_max1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 1
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: br label [[DO_END]]
;
entry:
%add = add i32 %n, 1
br label %do.body
Expand All @@ -29,17 +39,28 @@ do.end:
}

; Unroll thrice, with first loop exit kept
; CHECK-LABEL: @s32_max2
; CHECK: do.body:
; CHECK: store
; CHECK: br i1 %cmp, label %do.body.1, label %do.end
; CHECK: do.end:
; CHECK: ret void
; CHECK: do.body.1:
; CHECK: store
; CHECK: store
; CHECK: br label %do.end
define void @s32_max2(i32 %n, i32* %p) {
;
; CHECK-LABEL: @s32_max2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INC_1:%.*]] = add i32 [[INC]], 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: br label [[DO_END]]
;
entry:
%add = add i32 %n, 2
br label %do.body
Expand All @@ -57,11 +78,22 @@ do.end:
}

; Should not be unrolled
; CHECK-LABEL: @s32_maxx
; CHECK: do.body:
; CHECK: do.end:
; CHECK-NOT: do.body.1:
define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
;
; CHECK-LABEL: @s32_maxx(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
;
entry:
%add = add i32 %x, %n
br label %do.body
Expand All @@ -79,11 +111,25 @@ do.end:
}

; Should not be unrolled
; CHECK-LABEL: @s32_max2_unpredictable_exit
; CHECK: do.body:
; CHECK: do.end:
; CHECK-NOT: do.body.1:
define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
;
; CHECK-LABEL: @s32_max2_unpredictable_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
;
entry:
%add = add i32 %n, 2
br label %do.body
Expand All @@ -105,16 +151,25 @@ do.end:
}

; Unroll twice, with first loop exit kept
; CHECK-LABEL: @u32_max1
; CHECK: do.body:
; CHECK: store
; CHECK: br i1 %cmp, label %do.body.1, label %do.end
; CHECK: do.end:
; CHECK: ret void
; CHECK: do.body.1:
; CHECK: store
; CHECK: br label %do.end
define void @u32_max1(i32 %n, i32* %p) {
;
; CHECK-LABEL: @u32_max1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 1
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: br label [[DO_END]]
;
entry:
%add = add i32 %n, 1
br label %do.body
Expand All @@ -132,17 +187,28 @@ do.end:
}

; Unroll thrice, with first loop exit kept
; CHECK-LABEL: @u32_max2
; CHECK: do.body:
; CHECK: store
; CHECK: br i1 %cmp, label %do.body.1, label %do.end
; CHECK: do.end:
; CHECK: ret void
; CHECK: do.body.1:
; CHECK: store
; CHECK: store
; CHECK: br label %do.end
define void @u32_max2(i32 %n, i32* %p) {
;
; CHECK-LABEL: @u32_max2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INC_1:%.*]] = add i32 [[INC]], 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: br label [[DO_END]]
;
entry:
%add = add i32 %n, 2
br label %do.body
Expand All @@ -160,11 +226,22 @@ do.end:
}

; Should not be unrolled
; CHECK-LABEL: @u32_maxx
; CHECK: do.body:
; CHECK: do.end:
; CHECK-NOT: do.body.1:
define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
;
; CHECK-LABEL: @u32_maxx(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
; CHECK: do.end:
; CHECK-NEXT: ret void
;
entry:
%add = add i32 %x, %n
br label %do.body
Expand All @@ -182,11 +259,25 @@ do.end:
}

; Should not be unrolled
; CHECK-LABEL: @u32_max2_unpredictable_exit
; CHECK: do.body:
; CHECK: do.end:
; CHECK-NOT: do.body.1:
define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
;
; CHECK-LABEL: @u32_max2_unpredictable_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
;
entry:
%add = add i32 %n, 2
br label %do.body
Expand Down
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-unroll -unroll-allow-partial -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s

@table = internal unnamed_addr global [344 x i32] zeroinitializer, align 16

define i32 @test_partial_unroll_with_breakout_at_iter0() {
; CHECK-LABEL: define i32 @test_partial_unroll_with_breakout_at_iter0() {
; CHECK-LABEL: entry:
; CHECK-NEXT: br label %for.header

; CHECK-LABEL: for.header: ; preds = %for.latch.3, %entry
; CHECK-NEXT: %red = phi i32 [ 0, %entry ], [ %red.next.3, %for.latch.3 ]
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next.3, %for.latch.3 ]
; CHECK-NEXT: %red.next = add nuw nsw i32 10, %red
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 2
; CHECK-NEXT: %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next
; CHECK-NEXT: store i32 %red.next, i32* %ptr, align 4
; CHECK-NEXT: br label %for.latch

; CHECK-LABEL: for.latch: ; preds = %for.header
; CHECK-NEXT: %red.next.1 = add nuw nsw i32 10, %red.next
; CHECK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 2
; CHECK-NEXT: %ptr.1 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.1
; CHECK-NEXT: store i32 %red.next.1, i32* %ptr.1, align 4
; CHECK-NEXT: br label %for.latch.1

; CHECK-LABEL: exit: ; preds = %for.latch.2
;
;
; CHECK-LABEL: @test_partial_unroll_with_breakout_at_iter0(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
; CHECK: for.header:
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RED_NEXT_3:%.*]], [[FOR_LATCH_3:%.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_3:%.*]], [[FOR_LATCH_3]] ]
; CHECK-NEXT: [[RED_NEXT:%.*]] = add nuw nsw i32 10, [[RED]]
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT]]
; CHECK-NEXT: store i32 [[RED_NEXT]], i32* [[PTR]], align 4
; CHECK-NEXT: br label [[FOR_LATCH:%.*]]
; CHECK: for.latch:
; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nuw nsw i32 10, [[RED_NEXT]]
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 2
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_1]]
; CHECK-NEXT: store i32 [[RED_NEXT_1]], i32* [[PTR_1]], align 4
; CHECK-NEXT: br label [[FOR_LATCH_1:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0

; CHECK-LABEL: for.latch.1: ; preds = %for.latch
; CHECK-NEXT: %red.next.2 = add nuw nsw i32 10, %red.next.1
; CHECK-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 2
; CHECK-NEXT: %ptr.2 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.2
; CHECK-NEXT: store i32 %red.next.2, i32* %ptr.2, align 4
; CHECK-NEXT: br label %for.latch.2

; CHECK-LABEL: for.latch.2: ; preds = %for.latch.1
; CHECK-NEXT: %red.next.3 = add nuw nsw i32 10, %red.next.2
; CHECK-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 2
; CHECK-NEXT: %ptr.3 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.3
; CHECK-NEXT: store i32 %red.next.3, i32* %ptr.3, align 4
; CHECK-NEXT: %exitcond.1.i.3 = icmp eq i64 %iv.next.3, 344
; CHECK-NEXT: br i1 %exitcond.1.i.3, label %exit, label %for.latch.3

; CHECK-LABEL: for.latch.3: ; preds = %for.latch.2
; CHECK-NEXT: br label %for.header
; CHECK: for.latch.1:
; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nuw nsw i32 10, [[RED_NEXT_1]]
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 2
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_2]]
; CHECK-NEXT: store i32 [[RED_NEXT_2]], i32* [[PTR_2]], align 4
; CHECK-NEXT: br label [[FOR_LATCH_2:%.*]]
; CHECK: for.latch.2:
; CHECK-NEXT: [[RED_NEXT_3]] = add nuw nsw i32 10, [[RED_NEXT_2]]
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 2
; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_3]]
; CHECK-NEXT: store i32 [[RED_NEXT_3]], i32* [[PTR_3]], align 4
; CHECK-NEXT: [[EXITCOND_1_I_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 344
; CHECK-NEXT: br i1 [[EXITCOND_1_I_3]], label [[EXIT:%.*]], label [[FOR_LATCH_3]]
; CHECK: for.latch.3:
; CHECK-NEXT: br label [[FOR_HEADER]]
;
entry:
br label %for.header
Expand Down
238 changes: 207 additions & 31 deletions llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
Original file line number Diff line number Diff line change
@@ -1,31 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s
; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL

; this tests when unrolling multiple exit loop occurs by default (i.e. without specifying -unroll-runtime-multi-exit)

; the second exit block is a deopt block. The loop has one exiting block other than the latch.
define i32 @test1(i32* nocapture %a, i64 %n) {
; CHECK-LABEL: test1(
; CHECK-LABEL: header.epil:
; CHECK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
; CHECK-LABEL: otherexit.loopexit:
; CHECK-NEXT: %sum.02.lcssa.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ],
; CHECK-NEXT: br label %otherexit
; CHECK-LABEL: otherexit.loopexit3:
; CHECK-NEXT: br label %otherexit
; CHECK-LABEL: otherexit:
; CHECK-NEXT: %sum.02.lcssa = phi i32 [ %sum.02.lcssa.ph, %otherexit.loopexit ], [ %sum.02.epil, %otherexit.loopexit3 ]
; CHECK-NEXT: %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
; CHECK-NEXT: ret i32 %rval
; CHECK-LABEL: latch.7:
; CHECK: add i64 %indvars.iv, 8

; NOUNROLL: test1(
; NOUNROLL-NOT: .epil
; NOUNROLL-NOT: .prol
; NOUNROLL: otherexit:
; NOUNROLL-NEXT: %sum.02.lcssa = phi i32 [ %sum.02, %for.exiting_block ]
; NOUNROLL-NEXT: %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
;
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
; CHECK-NEXT: br i1 [[TMP1]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; CHECK: entry.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[N]], -8
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LATCH_7]] ]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
; CHECK: for.exiting_block:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: latchexit.unr-lcssa.loopexit:
; CHECK-NEXT: br label [[LATCHEXIT_UNR_LCSSA]]
; CHECK: latchexit.unr-lcssa:
; CHECK-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
; CHECK: header.epil.preheader:
; CHECK-NEXT: br label [[HEADER_EPIL:%.*]]
; CHECK: header.epil:
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
; CHECK: for.exiting_block.epil:
; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
; CHECK: latch.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP3]], [[SUM_02_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: latchexit.epilog-lcssa:
; CHECK-NEXT: br label [[LATCHEXIT]]
; CHECK: latchexit:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
; CHECK: otherexit.loopexit:
; CHECK-NEXT: [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[FOR_EXITING_BLOCK_3:%.*]] ], [ [[ADD_3:%.*]], [[FOR_EXITING_BLOCK_4:%.*]] ], [ [[ADD_4:%.*]], [[FOR_EXITING_BLOCK_5:%.*]] ], [ [[ADD_5:%.*]], [[FOR_EXITING_BLOCK_6:%.*]] ], [ [[ADD_6:%.*]], [[FOR_EXITING_BLOCK_7:%.*]] ]
; CHECK-NEXT: br label [[OTHEREXIT:%.*]]
; CHECK: otherexit.loopexit3:
; CHECK-NEXT: br label [[OTHEREXIT]]
; CHECK: otherexit:
; CHECK-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
; CHECK-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
; CHECK-NEXT: ret i32 [[RVAL]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
; CHECK-NEXT: [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
; CHECK-NEXT: [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
; CHECK-NEXT: [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]], label [[HEADER]]
;
; NOUNROLL-LABEL: @test1(
; NOUNROLL-NEXT: entry:
; NOUNROLL-NEXT: br label [[HEADER:%.*]]
; NOUNROLL: header:
; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
; NOUNROLL: for.exiting_block:
; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
; NOUNROLL: latch:
; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
; NOUNROLL: latchexit:
; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
; NOUNROLL: otherexit:
; NOUNROLL-NEXT: [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
; NOUNROLL-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
; NOUNROLL-NEXT: ret i32 [[RVAL]]
;
entry:
br label %header

Expand All @@ -35,8 +170,8 @@ header:
br label %for.exiting_block

for.exiting_block:
%cmp = icmp eq i64 %n, 42
br i1 %cmp, label %otherexit, label %latch
%cmp = icmp eq i64 %n, 42
br i1 %cmp, label %otherexit, label %latch

latch:
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
Expand All @@ -57,12 +192,53 @@ otherexit:

; the exit block is not a deopt block.
define i32 @test2(i32* nocapture %a, i64 %n) {
; CHECK-LABEL: test2(
; CHECK-NOT: .epil
; CHECK-NOT: .prol
; CHECK-LABEL: otherexit:
; CHECK-NEXT: ret i32 %sum.02

;
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[HEADER:%.*]]
; CHECK: header:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
; CHECK: for.exiting_block:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
; CHECK: latchexit:
; CHECK-NEXT: ret i32 [[ADD]]
; CHECK: otherexit:
; CHECK-NEXT: ret i32 [[SUM_02]]
;
; NOUNROLL-LABEL: @test2(
; NOUNROLL-NEXT: entry:
; NOUNROLL-NEXT: br label [[HEADER:%.*]]
; NOUNROLL: header:
; NOUNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; NOUNROLL-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
; NOUNROLL-NEXT: br label [[FOR_EXITING_BLOCK:%.*]]
; NOUNROLL: for.exiting_block:
; NOUNROLL-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
; NOUNROLL-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
; NOUNROLL: latch:
; NOUNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; NOUNROLL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; NOUNROLL-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
; NOUNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; NOUNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
; NOUNROLL-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
; NOUNROLL: latchexit:
; NOUNROLL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
; NOUNROLL-NEXT: ret i32 [[SUM_0_LCSSA]]
; NOUNROLL: otherexit:
; NOUNROLL-NEXT: [[RVAL:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
; NOUNROLL-NEXT: ret i32 [[RVAL]]
;
entry:
br label %header

Expand All @@ -72,8 +248,8 @@ header:
br label %for.exiting_block

for.exiting_block:
%cmp = icmp eq i64 %n, 42
br i1 %cmp, label %otherexit, label %latch
%cmp = icmp eq i64 %n, 42
br i1 %cmp, label %otherexit, label %latch

latch:
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
Expand Down
134 changes: 98 additions & 36 deletions llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -instcombine | FileCheck %s

; CHECK-LABEL: unroll
define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
;
; CHECK-LABEL: @unroll(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
; CHECK: for.body.lr.ph.new:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit.unr-lcssa.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
; CHECK: for.cond.cleanup.loopexit.unr-lcssa:
; CHECK-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER:%.*]]
; CHECK: for.body.epil.preheader:
; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]]
; CHECK: for.body.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_UNR]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV_UNR]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
; CHECK: for.cond.cleanup.loopexit.epilog-lcssa:
; CHECK-NEXT: [[ADD_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_BODY_EPIL_1]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_BODY_EPIL_2:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[C_0_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
; CHECK-NEXT: [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]]
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: [[MUL_3:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4
; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: for.body.epil.1:
; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
; CHECK-NEXT: [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[ADD_EPIL_1]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
; CHECK-NEXT: [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2]]
; CHECK: for.body.epil.2:
; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
; CHECK-NEXT: [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[ADD_EPIL_2]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]]
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
;
entry:
%cmp9 = icmp eq i32 %N, 0
br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
Expand All @@ -14,50 +111,15 @@ for.cond.cleanup:
%c.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %c.0.lcssa

; CHECK-LABEL: for.body.lr.ph
; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1
; CHECK: %xtraiter = and i64 %wide.trip.count, 3
; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3
; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new

; CHECK-LABEL: for.body.lr.ph.new:
; CHECK: %unroll_iter = and i64 %wide.trip.count, 4294967292
; CHECK: br label %for.body

; CHECK: [[CLEANUP]]:
; CHECK: [[MOD:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 0
; CHECK: br i1 [[MOD]], label %[[EXIT:.*]], label %[[EPIL_PEEL0_PRE:.*]]

; CHECK: [[EPIL_PEEL0_PRE]]:
; CHECK: br label %[[EPIL_PEEL0:.*]]

; CHECK: [[EPIL_PEEL0]]:
; CHECK: [[PEEL_CMP0:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 1
; CHECK: br i1 [[PEEL_CMP0]], label %[[EPIL_EXIT:.*]], label %[[EPIL_PEEL1:.*]]

; CHECK: [[EPIL_EXIT]]:
; CHECK: br label %[[EXIT]]

; CHECK: [[EXIT]]:
; CHECK: ret i32

; CHECK-LABEL: for.body:
; CHECK: [[INDVAR0:%[a-z.0-9]+]] = phi i64 [ 0, %for.body.lr.ph
; CHECK: [[ITER:%[a-z.0-9]+]] = phi i64 [ %unroll_iter
; CHECK: or i64 [[INDVAR0]], 1
; CHECK: or i64 [[INDVAR0]], 2
; CHECK: or i64 [[INDVAR0]], 3
; CHECK: add nuw nsw i64 [[INDVAR0]], 4
; CHECK: [[SUB:%[a-z.0-9]+]] = add i64 [[ITER]], -4
; CHECK: [[ITER_CMP:%[a-z.0-9]+]] = icmp eq i64 [[SUB]], 0
; CHECK: br i1 [[ITER_CMP]], label %[[LOOP_EXIT:.*]], label %for.body

; CHECK: [[EPIL_PEEL1]]:
; CHECK: [[PEEL_CMP1:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 2
; CHECK: br i1 [[PEEL_CMP1]], label %[[EPIL_EXIT]], label %[[EPIL_PEEL2:.*]]

; CHECK: [[EPIL_PEEL2]]:
; CHECK: br label %[[EXIT]]

for.body:
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
Expand Down