@@ -1,18 +1,33 @@
; RUN: opt -opaque-pointers=0 -S -passes=loop-reroll %s | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -passes=loop-reroll %s | FileCheck %s
target triple = "aarch64--linux-gnu"
define void @rerollable1 ([2 x i32 ]* nocapture %a ) {
define void @rerollable1 (ptr nocapture %a ) {
; CHECK-LABEL: define void @rerollable1
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 160
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[IV]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 80
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[SCEVGEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[IV]], 9
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
; base instruction
%iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
Expand All
@@ -24,16 +39,16 @@ loop:
%plus10 = add nuw nsw i64 %iv , 10
; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 0
%value0 = load i32 , i32* %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 0
store i32 %value0 , i32* %stptr0 , align 4
%ldptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 0
%value0 = load i32 , ptr %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 0
store i32 %value0 , ptr %stptr0 , align 4
; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 1
%value1 = load i32 , i32* %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 1
store i32 %value1 , i32* %stptr1 , align 4
%ldptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 1
%value1 = load i32 , ptr %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 1
store i32 %value1 , ptr %stptr1 , align 4
; loop-increment
%iv.next = add nuw nsw i64 %iv , 1
Expand All
@@ -46,39 +61,59 @@ exit:
ret void
}
define void @unrerollable1 ([2 x i32 ]* nocapture %a ) {
define void @unrerollable1 (ptr nocapture %a ) {
; CHECK-LABEL: define void @unrerollable1
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV]], i64 0
; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4
; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20
; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10
; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4
; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4
; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1
; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4
; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1
; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
; base instruction
%iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
; unrerollable instructions using %iv
%stptrx = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %iv , i64 0
store i32 999 , i32* %stptrx , align 4
%stptrx = getelementptr inbounds [2 x i32 ], ptr %a , i64 %iv , i64 0
store i32 999 , ptr %stptrx , align 4
; extra simple arithmetic operations, used by root instructions
%plus20 = add nuw nsw i64 %iv , 20
%plus10 = add nuw nsw i64 %iv , 10
; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 0
%value0 = load i32 , i32* %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 0
store i32 %value0 , i32* %stptr0 , align 4
%ldptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 0
%value0 = load i32 , ptr %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 0
store i32 %value0 , ptr %stptr0 , align 4
; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 1
%value1 = load i32 , i32* %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 1
store i32 %value1 , i32* %stptr1 , align 4
%ldptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 1
%value1 = load i32 , ptr %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 1
store i32 %value1 , ptr %stptr1 , align 4
; loop-increment
%iv.next = add nuw nsw i64 %iv , 1
Expand All
@@ -91,17 +126,36 @@ exit:
ret void
}
define void @unrerollable2 ([2 x i32 ]* nocapture %a ) {
define void @unrerollable2 (ptr nocapture %a ) {
; CHECK-LABEL: define void @unrerollable2
; CHECK-SAME: (ptr nocapture [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV_NEXT]], i64 0
; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4
; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20
; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10
; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0
; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4
; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0
; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4
; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1
; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4
; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1
; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
; base instruction
%iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
Expand All
@@ -110,24 +164,24 @@ loop:
%iv.next = add nuw nsw i64 %iv , 1
; unrerollable instructions using %iv.next
%stptrx = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %iv.next , i64 0
store i32 999 , i32* %stptrx , align 4
%stptrx = getelementptr inbounds [2 x i32 ], ptr %a , i64 %iv.next , i64 0
store i32 999 , ptr %stptrx , align 4
; extra simple arithmetic operations, used by root instructions
%plus20 = add nuw nsw i64 %iv , 20
%plus10 = add nuw nsw i64 %iv , 10
; root instruction 0
%ldptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 0
%value0 = load i32 , i32* %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 0
store i32 %value0 , i32* %stptr0 , align 4
%ldptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 0
%value0 = load i32 , ptr %ldptr0 , align 4
%stptr0 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 0
store i32 %value0 , ptr %stptr0 , align 4
; root instruction 1
%ldptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus20 , i64 1
%value1 = load i32 , i32* %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], [ 2 x i32 ]* %a , i64 %plus10 , i64 1
store i32 %value1 , i32* %stptr1 , align 4
%ldptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus20 , i64 1
%value1 = load i32 , ptr %ldptr1 , align 4
%stptr1 = getelementptr inbounds [2 x i32 ], ptr %a , i64 %plus10 , i64 1
store i32 %value1 , ptr %stptr1 , align 4
; latch
%exitcond = icmp eq i64 %iv.next , 5
Expand All
@@ -138,15 +192,28 @@ exit:
}
define dso_local void @rerollable2 () {
; CHECK-LABEL: define dso_local void @rerollable2() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV]], 24
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[IV]], 20
; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[TMP1]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[TMP0]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV]], 8
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
; induction variable
%iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
Expand Down
Expand Up
@@ -200,17 +267,43 @@ exit:
}
define dso_local void @unrerollable3 () {
; CHECK-LABEL: define dso_local void @unrerollable3() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_MUL3:%.*]] = mul nuw nsw i32 [[IV]], 3
; CHECK-NEXT: [[IV_SCALED:%.*]] = add nuw nsw i32 [[IV_MUL3]], 20
; CHECK-NEXT: [[IV_MUL7:%.*]] = mul nuw nsw i32 [[IV]], 7
; CHECK-NEXT: tail call void @bar(i32 [[IV_MUL7]])
; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[IV_SCALED]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD1:%.*]] = add nuw nsw i32 [[IV_SCALED]], 1
; CHECK-NEXT: [[IV_SCALED_ADD1_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD1]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD1_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD2:%.*]] = add nuw nsw i32 [[IV_SCALED]], 2
; CHECK-NEXT: [[IV_SCALED_ADD2_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD2]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD2_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD4:%.*]] = add nuw nsw i32 [[IV_SCALED]], 4
; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD4]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD5:%.*]] = add nuw nsw i32 [[IV_SCALED]], 5
; CHECK-NEXT: [[IV_SCALED_ADD5_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD5]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD5_DIV5]])
; CHECK-NEXT: [[IV_SCALED_ADD6:%.*]] = add nuw nsw i32 [[IV_SCALED]], 6
; CHECK-NEXT: [[IV_SCALED_ADD6_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD6]], 5
; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD6_DIV5]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 3
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)
; induction variable
%iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
Expand Down