885 changes: 513 additions & 372 deletions llvm/test/Transforms/LoopReroll/basic.ll

Large diffs are not rendered by default.

207 changes: 150 additions & 57 deletions llvm/test/Transforms/LoopReroll/extra_instr.ll
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
; RUN: opt -opaque-pointers=0 -S -passes=loop-reroll %s | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=loop-reroll %s | FileCheck %s
target triple = "aarch64--linux-gnu"

define void @rerollable1([2 x i32]* nocapture %a) {
define void @rerollable1(ptr nocapture %a) {
; CHECK-LABEL: define void @rerollable1
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 160
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 80
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[SCEVGEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[IV]], 9
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:

; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4

; base instruction
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
Expand All @@ -24,16 +39,16 @@ loop:
%plus10 = add nuw nsw i64 %iv, 10

; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
%value0 = load i32, i32* %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
store i32 %value0, i32* %stptr0, align 4
%ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0
%value0 = load i32, ptr %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0
store i32 %value0, ptr %stptr0, align 4

; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
%value1 = load i32, i32* %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
store i32 %value1, i32* %stptr1, align 4
%ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1
%value1 = load i32, ptr %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1
store i32 %value1, ptr %stptr1, align 4

; loop-increment
%iv.next = add nuw nsw i64 %iv, 1
Expand All @@ -46,39 +61,59 @@ exit:
ret void
}

define void @unrerollable1([2 x i32]* nocapture %a) {
define void @unrerollable1(ptr nocapture %a) {
; CHECK-LABEL: define void @unrerollable1
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV]], i64 0
; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4
; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20
; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10
; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4
; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4
; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1
; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4
; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1
; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:

; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4

; base instruction
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]

; unrerollable instructions using %iv
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
store i32 999, i32* %stptrx, align 4
%stptrx = getelementptr inbounds [2 x i32], ptr %a, i64 %iv, i64 0
store i32 999, ptr %stptrx, align 4

; extra simple arithmetic operations, used by root instructions
%plus20 = add nuw nsw i64 %iv, 20
%plus10 = add nuw nsw i64 %iv, 10

; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
%value0 = load i32, i32* %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
store i32 %value0, i32* %stptr0, align 4
%ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0
%value0 = load i32, ptr %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0
store i32 %value0, ptr %stptr0, align 4

; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
%value1 = load i32, i32* %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
store i32 %value1, i32* %stptr1, align 4
%ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1
%value1 = load i32, ptr %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1
store i32 %value1, ptr %stptr1, align 4

; loop-increment
%iv.next = add nuw nsw i64 %iv, 1
Expand All @@ -91,17 +126,36 @@ exit:
ret void
}

define void @unrerollable2([2 x i32]* nocapture %a) {
define void @unrerollable2(ptr nocapture %a) {
; CHECK-LABEL: define void @unrerollable2
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV_NEXT]], i64 0
; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4
; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20
; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10
; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4
; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4
; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1
; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4
; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1
; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:

; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4

; base instruction
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
Expand All @@ -110,24 +164,24 @@ loop:
%iv.next = add nuw nsw i64 %iv, 1

; unrerollable instructions using %iv.next
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
store i32 999, i32* %stptrx, align 4
%stptrx = getelementptr inbounds [2 x i32], ptr %a, i64 %iv.next, i64 0
store i32 999, ptr %stptrx, align 4

; extra simple arithmetic operations, used by root instructions
%plus20 = add nuw nsw i64 %iv, 20
%plus10 = add nuw nsw i64 %iv, 10

; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
%value0 = load i32, i32* %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
store i32 %value0, i32* %stptr0, align 4
%ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0
%value0 = load i32, ptr %ldptr0, align 4
%stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0
store i32 %value0, ptr %stptr0, align 4

; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
%value1 = load i32, i32* %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
store i32 %value1, i32* %stptr1, align 4
%ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1
%value1 = load i32, ptr %ldptr1, align 4
%stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1
store i32 %value1, ptr %stptr1, align 4

; latch
%exitcond = icmp eq i64 %iv.next, 5
Expand All @@ -138,15 +192,28 @@ exit:
}

define dso_local void @rerollable2() {
; CHECK-LABEL: define dso_local void @rerollable2() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV]], 24
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[IV]], 20
; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[TMP1]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[TMP0]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV]], 8
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:

; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}

; induction variable
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
Expand Down Expand Up @@ -200,17 +267,43 @@ exit:
}

define dso_local void @unrerollable3() {
; CHECK-LABEL: define dso_local void @unrerollable3() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_MUL3:%.*]] = mul nuw nsw i32 [[IV]], 3
; CHECK-NEXT: [[IV_SCALED:%.*]] = add nuw nsw i32 [[IV_MUL3]], 20
; CHECK-NEXT: [[IV_MUL7:%.*]] = mul nuw nsw i32 [[IV]], 7
; CHECK-NEXT: tail call void @bar(i32 [[IV_MUL7]])
; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[IV_SCALED]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD1:%.*]] = add nuw nsw i32 [[IV_SCALED]], 1
; CHECK-NEXT: [[IV_SCALED_ADD1_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD1]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD1_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD2:%.*]] = add nuw nsw i32 [[IV_SCALED]], 2
; CHECK-NEXT: [[IV_SCALED_ADD2_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD2]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD2_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD4:%.*]] = add nuw nsw i32 [[IV_SCALED]], 4
; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD4]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD5:%.*]] = add nuw nsw i32 [[IV_SCALED]], 5
; CHECK-NEXT: [[IV_SCALED_ADD5_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD5]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD5_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD6:%.*]] = add nuw nsw i32 [[IV_SCALED]], 6
; CHECK-NEXT: [[IV_SCALED_ADD6_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD6]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD6_DIV5]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 3
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:

; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)

; induction variable
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
Expand Down
116 changes: 80 additions & 36 deletions llvm/test/Transforms/LoopReroll/ptrindvar.ll
Original file line number Diff line number Diff line change
@@ -1,34 +1,57 @@
; RUN: opt -opaque-pointers=0 -S -passes=loop-reroll %s | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=loop-reroll %s | FileCheck %s
target triple = "aarch64--linux-gnu"

define i32 @test(i32* readonly %buf, i32* readnone %end) #0 {
define i32 @test(ptr readonly %buf, ptr readnone %end) #0 {
; CHECK-LABEL: define i32 @test
; CHECK-SAME: (ptr readonly [[BUF:%.*]], ptr readnone [[END:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BUF2:%.*]] = ptrtoint ptr [[BUF]] to i64
; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq ptr [[BUF]], [[END]]
; CHECK-NEXT: br i1 [[CMP_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[BUF2]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[S_011:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[INDVAR]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BUF]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_011]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR]], [[TMP4]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[S_0_LCSSA]]
;
entry:
%cmp.9 = icmp eq i32* %buf, %end
%cmp.9 = icmp eq ptr %buf, %end
br i1 %cmp.9, label %while.end, label %while.body.preheader

while.body.preheader:
br label %while.body

while.body:
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %indvar
;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4
;CHECK-NEXT: %add = add nsw i32 %5, %S.011
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body

%S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
%buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
%0 = load i32, i32* %buf.addr.010, align 4
%buf.addr.010 = phi ptr [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
%0 = load i32, ptr %buf.addr.010, align 4
%add = add nsw i32 %0, %S.011
%arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 1
%1 = load i32, i32* %arrayidx1, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %buf.addr.010, i64 1
%1 = load i32, ptr %arrayidx1, align 4
%add2 = add nsw i32 %add, %1
%add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 2
%cmp = icmp eq i32* %add.ptr, %end
%add.ptr = getelementptr inbounds i32, ptr %buf.addr.010, i64 2
%cmp = icmp eq ptr %add.ptr, %end
br i1 %cmp, label %while.end.loopexit, label %while.body

while.end.loopexit:
Expand All @@ -40,35 +63,56 @@ while.end:
ret i32 %S.0.lcssa
}

define i32 @test2(i32* readonly %buf, i32* readnone %end) #0 {
define i32 @test2(ptr readonly %buf, ptr readnone %end) #0 {
; CHECK-LABEL: define i32 @test2
; CHECK-SAME: (ptr readonly [[BUF:%.*]], ptr readnone [[END:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64
; CHECK-NEXT: [[BUF1:%.*]] = ptrtoint ptr [[BUF]] to i64
; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq ptr [[BUF]], [[END]]
; CHECK-NEXT: br i1 [[CMP_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[BUF1]], -8
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[END2]]
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[S_011:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[INDVAR]], -4
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BUF]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_011]]
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR]], [[TMP4]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[S_0_LCSSA]]
;
entry:
%cmp.9 = icmp eq i32* %buf, %end
%cmp.9 = icmp eq ptr %buf, %end
br i1 %cmp.9, label %while.end, label %while.body.preheader

while.body.preheader:
br label %while.body

while.body:
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1
;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5
;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4
;CHECK-NEXT: %add = add nsw i32 %6, %S.011
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body

%S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
%buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
%0 = load i32, i32* %buf.addr.010, align 4
%buf.addr.010 = phi ptr [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
%0 = load i32, ptr %buf.addr.010, align 4
%add = add nsw i32 %0, %S.011
%arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 -1
%1 = load i32, i32* %arrayidx1, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %buf.addr.010, i64 -1
%1 = load i32, ptr %arrayidx1, align 4
%add2 = add nsw i32 %add, %1
%add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 -2
%cmp = icmp eq i32* %add.ptr, %end
%add.ptr = getelementptr inbounds i32, ptr %buf.addr.010, i64 -2
%cmp = icmp eq ptr %add.ptr, %end
br i1 %cmp, label %while.end.loopexit, label %while.body

while.end.loopexit:
Expand Down