32 changes: 14 additions & 18 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,39 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: cmp r1, #4
; CHECK-NEXT: it ge
; CHECK-NEXT: movge r3, #4
; CHECK-NEXT: mov.w r12, #1
; CHECK-NEXT: subs r3, r1, r3
; CHECK-NEXT: movge r4, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r4, r1, r4
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: adds r3, #3
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: adds r4, #3
; CHECK-NEXT: add.w r12, r3, r4, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q1, [r12], #16
; CHECK-NEXT: vaddt.f32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: vldrw.u32 q1, [r4], #16
; CHECK-NEXT: vadd.f32 q0, q0, q1
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: dls lr, r12
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: vcvt.f32.u32 s4, s4
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vdiv.f32 s0, s0, s4
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrwt.u32 q1, [r0], #16
; CHECK-NEXT: vsubt.f32 q1, q1, r12
; CHECK-NEXT: vsubt.f32 q1, q1, r4
; CHECK-NEXT: vfmat.f32 q0, q1, q1
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
Expand All @@ -50,7 +50,7 @@
}
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4

Expand Down Expand Up @@ -169,7 +169,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
$r12 = t2MOVr killed $r3, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>*
%8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7)
Expand All @@ -50,7 +50,7 @@
}
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4

Expand Down Expand Up @@ -168,7 +168,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
$r12 = t2MOVr killed $r3, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2LSRri killed renamable $r12, 1, 14, $noreg, $noreg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,17 @@
br i1 %26, label %49, label %31

31: ; preds = %23
call void @llvm.set.loop.iterations.i32(i32 %30)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %30)
br label %65

32: ; preds = %11
call void @llvm.set.loop.iterations.i32(i32 %22)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %22)
br label %33

33: ; preds = %33, %32
%34 = phi i32* [ %46, %33 ], [ %0, %32 ]
%35 = phi i32* [ %45, %33 ], [ %1, %32 ]
%36 = phi i32 [ %22, %32 ], [ %47, %33 ]
%36 = phi i32 [ %start2, %32 ], [ %47, %33 ]
%37 = phi i32 [ %9, %32 ], [ %41, %33 ]
%38 = bitcast i32* %34 to <4 x i32>*
%39 = bitcast i32* %35 to <4 x i32>*
Expand Down Expand Up @@ -89,7 +89,7 @@
65: ; preds = %65, %31
%66 = phi i32 [ %108, %65 ], [ 0, %31 ]
%67 = phi i32 [ 0, %31 ], [ %107, %65 ]
%68 = phi i32 [ %30, %31 ], [ %109, %65 ]
%68 = phi i32 [ %start1, %31 ], [ %109, %65 ]
%69 = bitcast i32* %0 to i8*
%70 = bitcast i32* %1 to i8*
%71 = getelementptr i8, i8* %70, i32 %66
Expand Down Expand Up @@ -141,7 +141,7 @@

declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4

Expand Down Expand Up @@ -353,7 +353,7 @@ body: |
renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
$r2 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.3 (%ir-block.33):
successors: %bb.3(0x7c000000), %bb.4(0x04000000)
Expand Down Expand Up @@ -402,7 +402,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r3, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8 (%ir-block.65):
successors: %bb.8(0x7c000000), %bb.9(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
br i1 %10, label %34, label %17

17: ; preds = %4
call void @llvm.set.loop.iterations.i32(i32 %16)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %16)
br label %18

18: ; preds = %18, %17
%19 = phi i32* [ %31, %18 ], [ %0, %17 ]
%20 = phi i32* [ %30, %18 ], [ %1, %17 ]
%21 = phi i32 [ %16, %17 ], [ %32, %18 ]
%21 = phi i32 [ %start, %17 ], [ %32, %18 ]
%22 = phi i32 [ %9, %17 ], [ %26, %18 ]
%23 = bitcast i32* %19 to <4 x i32>*
%24 = bitcast i32* %20 to <4 x i32>*
Expand All @@ -45,7 +45,7 @@
}
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)

Expand Down Expand Up @@ -143,7 +143,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
$r3 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2 (%ir-block.18):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader

for.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body

for.cond.cleanup: ; preds = %for.end, %entry
Expand All @@ -18,7 +18,7 @@
%lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
%lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
%lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
%lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
%lsr.iv = phi i32 [ %start, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
%size = call i32 @llvm.arm.space(i32 3072, i32 undef)
%0 = load i32, i32* %lsr.iv4, align 4
%1 = load i32, i32* %lsr.iv2, align 4
Expand Down Expand Up @@ -46,7 +46,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1

; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
Expand Down Expand Up @@ -166,7 +166,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
$lr = t2DoLoopStart killed $r3
tB %bb.2, 14, $noreg
bb.2.for.end:
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
br i1 %cmp30, label %for.cond.cleanup6, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
%lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
%lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
%lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
Expand Down Expand Up @@ -50,14 +50,14 @@
br i1 %13, label %for.cond.cleanup6, label %vector.ph39

vector.ph39: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %19)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %19)
br label %vector.body38

vector.body38: ; preds = %vector.body38, %vector.ph39
%lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
%lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%20 = phi i32 [ %19, %vector.ph39 ], [ %26, %vector.body38 ]
%20 = phi i32 [ %start2, %vector.ph39 ], [ %26, %vector.body38 ]
%21 = phi i32 [ %N, %vector.ph39 ], [ %23, %vector.body38 ]
%lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
%lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
Expand Down Expand Up @@ -94,14 +94,14 @@
br i1 %cmp30, label %for.cond4.preheader, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ]
%lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ]
%lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ]
%lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>*
%lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>*
Expand Down Expand Up @@ -130,14 +130,14 @@
br i1 %cmp528, label %for.cond.cleanup6, label %vector.ph39

vector.ph39: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %18)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %18)
br label %vector.body38

vector.body38: ; preds = %vector.body38, %vector.ph39
%lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ]
%lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ]
%19 = phi i32 [ %18, %vector.ph39 ], [ %25, %vector.body38 ]
%19 = phi i32 [ %start2, %vector.ph39 ], [ %25, %vector.body38 ]
%20 = phi i32 [ %N, %vector.ph39 ], [ %22, %vector.body38 ]
%lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>*
%lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>*
Expand Down Expand Up @@ -173,14 +173,14 @@
br i1 %cmp54, label %for.cond.cleanup17, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv123 = phi i32* [ %scevgep124, %vector.body ], [ %a, %vector.ph ]
%lsr.iv120 = phi i32* [ %scevgep121, %vector.body ], [ %c, %vector.ph ]
%lsr.iv117 = phi i32* [ %scevgep118, %vector.body ], [ %b, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv123125 = bitcast i32* %lsr.iv123 to <4 x i32>*
%lsr.iv120122 = bitcast i32* %lsr.iv120 to <4 x i32>*
Expand Down Expand Up @@ -210,14 +210,14 @@
br i1 %cmp552, label %for.cond15.preheader, label %vector.ph66

vector.ph66: ; preds = %for.cond4.preheader
call void @llvm.set.loop.iterations.i32(i32 %18)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %18)
br label %vector.body65

vector.body65: ; preds = %vector.body65, %vector.ph66
%lsr.iv114 = phi i32* [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ]
%lsr.iv111 = phi i32* [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ]
%lsr.iv108 = phi i32* [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ]
%19 = phi i32 [ %18, %vector.ph66 ], [ %25, %vector.body65 ]
%19 = phi i32 [ %start2, %vector.ph66 ], [ %25, %vector.body65 ]
%20 = phi i32 [ %div, %vector.ph66 ], [ %22, %vector.body65 ]
%lsr.iv114116 = bitcast i32* %lsr.iv114 to <4 x i32>*
%lsr.iv111113 = bitcast i32* %lsr.iv111 to <4 x i32>*
Expand Down Expand Up @@ -248,14 +248,14 @@
br i1 %27, label %for.cond.cleanup17, label %vector.ph85

vector.ph85: ; preds = %for.cond15.preheader
call void @llvm.set.loop.iterations.i32(i32 %33)
%start3 = call i32 @llvm.start.loop.iterations.i32(i32 %33)
br label %vector.body84

vector.body84: ; preds = %vector.body84, %vector.ph85
%lsr.iv105 = phi i32* [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ]
%lsr.iv102 = phi i32* [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ]
%lsr.iv = phi i32* [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ]
%34 = phi i32 [ %33, %vector.ph85 ], [ %40, %vector.body84 ]
%34 = phi i32 [ %start3, %vector.ph85 ], [ %40, %vector.body84 ]
%35 = phi i32 [ %N, %vector.ph85 ], [ %37, %vector.body84 ]
%lsr.iv105107 = bitcast i32* %lsr.iv105 to <4 x i32>*
%lsr.iv102104 = bitcast i32* %lsr.iv102 to <4 x i32>*
Expand All @@ -280,7 +280,7 @@
}
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)

Expand Down Expand Up @@ -431,7 +431,7 @@ body: |
$r4 = tMOVr $r3, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, renamable $r12, 19, 14, $noreg, $noreg
$r6 = tMOVr $r1, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -462,7 +462,7 @@ body: |
renamable $r6, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, killed renamable $r12, 19, 14, $noreg, $noreg
$r12 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body38:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
Expand Down Expand Up @@ -637,7 +637,7 @@ body: |
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14, $noreg
renamable $lr = nuw nsw t2ADDrs renamable $r12, killed renamable $r6, 19, 14, $noreg, $noreg
$r6 = tMOVr $r2, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -670,7 +670,7 @@ body: |
renamable $r6 = t2BICri killed renamable $r6, 3, 14, $noreg, $noreg
renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r6, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body38:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
Expand Down Expand Up @@ -878,7 +878,7 @@ body: |
$r4 = tMOVr $r3, 14, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r6, renamable $r12, 19, 14, $noreg, $noreg
$r6 = tMOVr $r1, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -919,7 +919,7 @@ body: |
$r4 = tMOVr $r1, 14, $noreg
renamable $lr = nuw nsw t2ADDrs renamable $r8, killed renamable $r6, 19, 14, $noreg, $noreg
$r6 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.5.vector.body65:
successors: %bb.5(0x7c000000), %bb.6(0x04000000)
Expand Down Expand Up @@ -952,7 +952,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r8, killed renamable $r12, 19, 14, $noreg, $noreg
$r5 = tMOVr $r0, 14, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8.vector.body84:
successors: %bb.8(0x7c000000), %bb.9(0x04000000)
Expand Down
68 changes: 34 additions & 34 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ define arm_aapcs_vfpcc void @float_float_mul(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB0_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
Expand Down Expand Up @@ -311,9 +311,9 @@ define arm_aapcs_vfpcc void @float_float_add(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB1_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
Expand Down Expand Up @@ -530,9 +530,9 @@ define arm_aapcs_vfpcc void @float_float_sub(float* nocapture readonly %a, float
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB2_12: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
Expand Down Expand Up @@ -680,9 +680,9 @@ define arm_aapcs_vfpcc void @float_int_mul(float* nocapture readonly %a, i32* no
; CHECK-NEXT: sub.w r7, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: add.w r7, r6, r7, lsr #2
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: .LBB3_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r5], #16
Expand Down Expand Up @@ -889,10 +889,10 @@ define arm_aapcs_vfpcc void @float_int_int_mul(i32* nocapture readonly %a, i32*
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB4_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r4], #16
Expand All @@ -906,11 +906,11 @@ define arm_aapcs_vfpcc void @float_int_int_mul(i32* nocapture readonly %a, i32*
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, r5, r6, pc}
; CHECK-NEXT: .LBB4_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #2
; CHECK-NEXT: add.w r1, r1, r12, lsl #2
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB4_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r3, [r0], #4
Expand Down Expand Up @@ -994,10 +994,10 @@ define arm_aapcs_vfpcc void @half_half_mul(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
Expand All @@ -1021,11 +1021,11 @@ define arm_aapcs_vfpcc void @half_half_mul(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB5_8
; CHECK-NEXT: .LBB5_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB5_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
Expand Down Expand Up @@ -1111,10 +1111,10 @@ define arm_aapcs_vfpcc void @half_half_add(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB6_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
Expand All @@ -1138,11 +1138,11 @@ define arm_aapcs_vfpcc void @half_half_add(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB6_8
; CHECK-NEXT: .LBB6_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB6_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
Expand Down Expand Up @@ -1228,10 +1228,10 @@ define arm_aapcs_vfpcc void @half_half_sub(half* nocapture readonly %a, half* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB7_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr.w r9, [r4]
Expand All @@ -1255,11 +1255,11 @@ define arm_aapcs_vfpcc void @half_half_sub(half* nocapture readonly %a, half* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB7_8
; CHECK-NEXT: .LBB7_6: @ %for.body.preheader11
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB7_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldr.16 s0, [r1]
Expand Down Expand Up @@ -1345,10 +1345,10 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: sub.w r6, r12, #4
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: add.w lr, r5, r6, lsr #2
; CHECK-NEXT: add.w r6, r5, r6, lsr #2
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB8_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u32 q0, [r5], #8
Expand Down Expand Up @@ -1377,11 +1377,11 @@ define arm_aapcs_vfpcc void @half_short_mul(half* nocapture readonly %a, i16* no
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: beq .LBB8_8
; CHECK-NEXT: .LBB8_6: @ %for.body.preheader13
; CHECK-NEXT: sub.w lr, r3, r12
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: add.w r0, r0, r12, lsl #1
; CHECK-NEXT: add.w r1, r1, r12, lsl #1
; CHECK-NEXT: add.w r2, r2, r12, lsl #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB8_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r3, [r1], #2
Expand Down Expand Up @@ -1476,9 +1476,9 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: .LBB9_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
Expand Down Expand Up @@ -1633,9 +1633,9 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: .LBB10_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, r0, r3
Expand Down Expand Up @@ -1790,10 +1790,10 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: add.w r2, r3, r2, lsr #2
; CHECK-NEXT: adds r3, r1, #4
; CHECK-NEXT: dls lr, r2
; CHECK-NEXT: adds r2, r0, #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB11_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -91,9 +91,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -167,9 +167,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -243,9 +243,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -319,9 +319,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: sub.w r12, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: add.w r3, r3, r12, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -430,10 +430,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: adds r5, r0, #3
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: adds r6, r1, #1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrb r8, [r5, #-3]
Expand Down Expand Up @@ -624,8 +624,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB6_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
Expand Down Expand Up @@ -732,10 +732,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: adds r5, r0, #3
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: adds r6, r1, #1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB7_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrb r8, [r5, #-3]
Expand Down Expand Up @@ -926,8 +926,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
Expand Down Expand Up @@ -1034,10 +1034,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
; CHECK-NEXT: add.w r4, r3, #8
; CHECK-NEXT: subs r5, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r6, r5, lsr #2
; CHECK-NEXT: add.w r6, r6, r5, lsr #2
; CHECK-NEXT: add.w r5, r0, #8
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: add.w r6, r1, #8
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB9_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r8, [r5, #-8]
Expand Down Expand Up @@ -1214,8 +1214,8 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB10_1: @ %vector.ph
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB10_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add.w r12, r12, #8
Expand Down
194 changes: 97 additions & 97 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
br i1 %cmp9, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ]
Expand Down Expand Up @@ -49,7 +49,7 @@
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4

Expand Down Expand Up @@ -152,7 +152,7 @@ body: |
renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg
renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg
renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14, $noreg
bb.2.vector.body:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
br i1 %cmp11, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
%6 = shl i32 %4, 3
%7 = sub i32 %N, %6
br label %vector.body
Expand All @@ -23,7 +23,7 @@
%lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %13, %vector.body ]
%8 = phi i32 [ %5, %vector.ph ], [ %14, %vector.body ]
%8 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ]
%9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ]
%lsr.iv2022 = bitcast i8* %lsr.iv20 to <16 x i8>*
%lsr.iv19 = bitcast i8* %lsr.iv to <16 x i8>*
Expand Down Expand Up @@ -54,7 +54,7 @@

declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.start.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4

Expand Down Expand Up @@ -180,7 +180,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 35, 14, $noreg, $noreg
renamable $r3 = t2LSRri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
br i1 %cmp10, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv19 = phi i8* [ %scevgep20, %vector.body ], [ %res, %vector.ph ]
%lsr.iv16 = phi i8* [ %scevgep17, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%6 = phi i32 [ %5, %vector.ph ], [ %11, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv1921 = bitcast i8* %lsr.iv19 to <16 x i8>*
%lsr.iv1618 = bitcast i8* %lsr.iv16 to <16 x i8>*
Expand All @@ -45,7 +45,7 @@

declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <16 x i1> @llvm.arm.mve.vctp8(i32)

Expand Down Expand Up @@ -155,7 +155,7 @@ body: |
renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
br i1 %cmp9, label %exit, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %tmp5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %tmp5, %vector.ph ]
%lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ]
%lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>*
Expand All @@ -39,7 +39,7 @@
}

declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
Expand Down Expand Up @@ -123,7 +123,7 @@ body: |
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg
renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
$r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.2.vector.body:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i8* %lsr.iv to <4 x i8>*
%lsr.iv1416 = bitcast i8* %lsr.iv14 to <4 x i8>*
Expand Down Expand Up @@ -61,14 +61,14 @@
br i1 %cmp10.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi i8* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv14 = bitcast i8* %lsr.iv to <4 x i8>*
%lsr.iv1517 = bitcast i8* %lsr.iv15 to <4 x i8>*
Expand Down Expand Up @@ -108,14 +108,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i16* %lsr.iv to <4 x i16>*
%lsr.iv1416 = bitcast i16* %lsr.iv14 to <4 x i16>*
Expand Down Expand Up @@ -155,14 +155,14 @@
br i1 %cmp10.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi i16* [ %scevgep16, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %15, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
%lsr.iv1517 = bitcast i16* %lsr.iv15 to <4 x i16>*
Expand Down Expand Up @@ -203,14 +203,14 @@
br i1 %cmp8.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv13 = phi i32* [ %scevgep14, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv12 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1315 = bitcast i32* %lsr.iv13 to <4 x i32>*
Expand Down Expand Up @@ -249,14 +249,14 @@
br i1 %cmp9.not, label %for.cond.cleanup, label %vector.ph

vector.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %b, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %13, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ]
%7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
Expand Down Expand Up @@ -286,7 +286,7 @@
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)

Expand Down Expand Up @@ -372,7 +372,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -478,7 +478,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -585,7 +585,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -691,7 +691,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -797,7 +797,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down Expand Up @@ -903,7 +903,7 @@ body: |
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
170 changes: 85 additions & 85 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,26 +69,26 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vadd.i16 q1, q0, q1
; CHECK-NEXT: vadd.i16 q0, q1, q0
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: vadd.i16 q1, q1, q2
; CHECK-NEXT: vadd.i16 q0, q0, q2
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
Expand Down Expand Up @@ -142,25 +142,25 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocaptur
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r3, r2, #15
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #15
; CHECK-NEXT: sub.w r12, r3, #16
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #4
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u8 q1, [r1], #16
; CHECK-NEXT: vldrbt.u8 q0, [r1], #16
; CHECK-NEXT: vldrbt.u8 q2, [r0], #16
; CHECK-NEXT: subs r2, #16
; CHECK-NEXT: vsub.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q1, q1, q0
; CHECK-NEXT: vsub.i8 q0, q2, q0
; CHECK-NEXT: vadd.i8 q0, q0, q1
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
Expand Down Expand Up @@ -212,25 +212,25 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vsub.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vsub.i16 q0, q2, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
Expand Down Expand Up @@ -284,25 +284,25 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocaptur
; CHECK-NEXT: .LBB4_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r3, r2, #15
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #15
; CHECK-NEXT: sub.w r12, r3, #16
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #4
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u8 q1, [r0], #16
; CHECK-NEXT: vldrbt.u8 q0, [r0], #16
; CHECK-NEXT: vldrbt.u8 q2, [r1], #16
; CHECK-NEXT: subs r2, #16
; CHECK-NEXT: vmul.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q1, q1, q0
; CHECK-NEXT: vmul.i8 q0, q2, q0
; CHECK-NEXT: vadd.i8 q0, q0, q1
; CHECK-NEXT: le lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
Expand Down Expand Up @@ -354,25 +354,25 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocaptu
; CHECK-NEXT: .LBB5_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: sub.w r12, r3, #8
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r3, r3, r12, lsr #3
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r0], #8
; CHECK-NEXT: vldrbt.u16 q0, [r0], #8
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vmul.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vmul.i16 q0, q2, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB5_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
Expand Down Expand Up @@ -423,36 +423,36 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: beq .LBB6_8
; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: subs r6, r3, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: add.w r3, r3, r6, lsr #2
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u32 q1, [r4], #4
; CHECK-NEXT: vldrbt.u32 q0, [r4], #4
; CHECK-NEXT: vldrbt.u32 q2, [r5], #4
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmul.i32 q1, q2, q1
; CHECK-NEXT: vadd.i32 q1, q1, q0
; CHECK-NEXT: vmul.i32 q0, q2, q0
; CHECK-NEXT: vadd.i32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB6_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r12, q0
; CHECK-NEXT: cbz r2, .LBB6_7
; CHECK-NEXT: @ %bb.4: @ %vector.ph47
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: add.w r3, r3, r6, lsr #2
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: vdup.32 q0, r6
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: .LBB6_5: @ %vector.body46
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
Expand Down Expand Up @@ -550,32 +550,32 @@ define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocaptur
; CHECK-NEXT: cbz r2, .LBB7_4
; CHECK-NEXT: @ %bb.1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: bic r3, r3, #7
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: subs r3, #8
; CHECK-NEXT: vmov q3, q1
; CHECK-NEXT: add.w lr, r4, r3, lsr #3
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: vmov q3, q0
; CHECK-NEXT: add.w r3, r4, r3, lsr #3
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: .LBB7_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov q1, q0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrbt.u16 q1, [r3], #8
; CHECK-NEXT: vldrbt.u16 q0, [r3], #8
; CHECK-NEXT: vldrbt.u16 q4, [r4], #8
; CHECK-NEXT: vmov q2, q3
; CHECK-NEXT: vsub.i16 q3, q4, q1
; CHECK-NEXT: vmul.i16 q1, q4, q1
; CHECK-NEXT: vsub.i16 q3, q4, q0
; CHECK-NEXT: vmul.i16 q0, q4, q0
; CHECK-NEXT: subs r2, #8
; CHECK-NEXT: vadd.i16 q3, q3, q2
; CHECK-NEXT: vadd.i16 q1, q1, q0
; CHECK-NEXT: vadd.i16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB7_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q2, q3, q2
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u16 r4, q2
; CHECK-NEXT: vaddv.u16 r2, q0
; CHECK-NEXT: b .LBB7_5
Expand Down Expand Up @@ -643,40 +643,40 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: movw r12, #47184
; CHECK-NEXT: movw r3, #23593
; CHECK-NEXT: ldrd r2, lr, [r1, #4]
; CHECK-NEXT: movw r1, #23593
; CHECK-NEXT: movt r12, #1310
; CHECK-NEXT: movt r3, #49807
; CHECK-NEXT: mla r3, lr, r3, r12
; CHECK-NEXT: movw r1, #55051
; CHECK-NEXT: movt r1, #49807
; CHECK-NEXT: mla r1, lr, r1, r12
; CHECK-NEXT: movw r3, #55051
; CHECK-NEXT: movw r4, #23593
; CHECK-NEXT: movt r1, #163
; CHECK-NEXT: movt r3, #163
; CHECK-NEXT: ldr r0, [r0]
; CHECK-NEXT: movt r4, #655
; CHECK-NEXT: ror.w r12, r3, #4
; CHECK-NEXT: cmp r12, r1
; CHECK-NEXT: cset r1, lo
; CHECK-NEXT: ror.w r3, r3, #2
; CHECK-NEXT: ror.w r12, r1, #4
; CHECK-NEXT: cmp r12, r3
; CHECK-NEXT: cset r3, lo
; CHECK-NEXT: ror.w r1, r1, #2
; CHECK-NEXT: mov.w r12, #1
; CHECK-NEXT: cmp r3, r4
; CHECK-NEXT: csel r3, r1, r12, lo
; CHECK-NEXT: cmp r1, r4
; CHECK-NEXT: csel r1, r3, r12, lo
; CHECK-NEXT: lsls.w r4, lr, #30
; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r3, r3, r1, ne
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r4, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: movs r4, #52
; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: movw r3, :lower16:days
; CHECK-NEXT: movt r3, :upper16:days
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mla r1, r1, r4, r3
; CHECK-NEXT: adds r1, r2, #3
; CHECK-NEXT: bic r1, r1, #3
; CHECK-NEXT: subs r1, #4
; CHECK-NEXT: add.w r4, r12, r1, lsr #2
; CHECK-NEXT: movw r12, :lower16:days
; CHECK-NEXT: movt r12, :upper16:days
; CHECK-NEXT: movs r1, #52
; CHECK-NEXT: mla r1, r3, r1, r12
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32*
; CHECK-NEXT: vmov.i32 q2, #0x1
; CHECK-NEXT: add.w lr, r5, #3
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: add.w lr, r5, lr, lsr #2
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: add.w r5, r5, lr, lsr #2
; CHECK-NEXT: dls lr, r5
; CHECK-NEXT: .LBB1_1: @ %bb6
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r12
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-elem-moves.mir
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
%ind.end17 = getelementptr float, float* %pDst, i32 %n.vec
%scevgep9 = getelementptr float, float* %pDst, i32 -4
%scevgep14 = getelementptr float, float* %pSrc, i32 -4
call void @llvm.set.loop.iterations.i32(i32 %4)
%start1 = call i32 @llvm.start.loop.iterations.i32(i32 %4)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv15 = phi float* [ %scevgep16, %vector.body ], [ %scevgep14, %vector.ph ]
%lsr.iv10 = phi float* [ %scevgep11, %vector.body ], [ %scevgep9, %vector.ph ]
%5 = phi i32 [ %4, %vector.ph ], [ %7, %vector.body ]
%5 = phi i32 [ %start1, %vector.ph ], [ %7, %vector.body ]
%lsr.iv1517 = bitcast float* %lsr.iv15 to <4 x float>*
%lsr.iv1012 = bitcast float* %lsr.iv10 to <4 x float>*
%scevgep18 = getelementptr <4 x float>, <4 x float>* %lsr.iv1517, i32 1
Expand All @@ -61,13 +61,13 @@
%pDst.addr.06.ph = phi float* [ %pDst, %vector.memcheck ], [ %pDst, %while.body.preheader ], [ %ind.end17, %middle.block ]
%scevgep1 = getelementptr float, float* %pSrc.addr.07.ph, i32 -1
%scevgep4 = getelementptr float, float* %pDst.addr.06.ph, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %blkCnt.08.ph)
%start2 = call i32 @llvm.start.loop.iterations.i32(i32 %blkCnt.08.ph)
br label %while.body

while.body: ; preds = %while.body, %while.body.preheader19
%lsr.iv5 = phi float* [ %scevgep6, %while.body ], [ %scevgep4, %while.body.preheader19 ]
%lsr.iv = phi float* [ %scevgep2, %while.body ], [ %scevgep1, %while.body.preheader19 ]
%9 = phi i32 [ %blkCnt.08.ph, %while.body.preheader19 ], [ %12, %while.body ]
%9 = phi i32 [ %start2, %while.body.preheader19 ], [ %12, %while.body ]
%scevgep3 = getelementptr float, float* %lsr.iv, i32 1
%scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
%10 = load float, float* %scevgep3, align 4
Expand All @@ -84,7 +84,7 @@
}
declare float @llvm.fabs.f32(float)
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)

...
Expand Down Expand Up @@ -262,7 +262,7 @@ body: |
renamable $r7, dead $cpsr = tSUBrr renamable $r2, renamable $r4, 14, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg
renamable $r12 = t2ADDrs renamable $r0, renamable $r4, 18, 14, $noreg, $noreg
t2DoLoopStart renamable $r3
$lr = t2DoLoopStart renamable $r3
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 16, 14, $noreg
$r5 = tMOVr killed $r3, 14, $noreg
renamable $r3 = t2ADDrs renamable $r1, renamable $r4, 18, 14, $noreg, $noreg
Expand Down Expand Up @@ -305,7 +305,7 @@ body: |
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r3, 4, 14, $noreg
renamable $r1 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
bb.8.while.body:
successors: %bb.8(0x7c000000), %bb.9(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
br i1 %cmp6, label %while.end, label %while.body.preheader

while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body

while.body: ; preds = %while.body, %while.body.preheader
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
%add = add nsw i32 %call, %res.07
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
Expand All @@ -33,7 +33,7 @@

declare i32 @bar(...) local_unnamed_addr #0

declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1

attributes #0 = { "target-features"="+mve.fp" }
Expand Down Expand Up @@ -109,7 +109,7 @@ body: |
$lr = tMOVr $r0, 14, $noreg
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
br i1 %cmp6, label %while.end, label %while.body.preheader

while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body

while.body: ; preds = %while.body, %while.body.preheader
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 0
%2 = icmp ne i32 %1, 0
Expand All @@ -29,7 +29,7 @@
ret i32 %res.0.lcssa
}

declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1

attributes #0 = { "target-features"="+mve.fp" }
Expand Down Expand Up @@ -96,7 +96,7 @@ body: |
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
br i1 %cmp6, label %while.end, label %while.body.preheader

while.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %while.body

while.body: ; preds = %while.body, %while.body.preheader
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
%0 = phi i32 [ %start, %while.body.preheader ], [ %1, %while.body ]
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
%add = add i32 %1, 2
%2 = icmp ne i32 %1, 0
Expand All @@ -30,7 +30,7 @@
}

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1

; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
Expand Down Expand Up @@ -102,7 +102,7 @@ body: |
liveins: $r0
$lr = tMOVr $r0, 14, $noreg
t2DoLoopStart killed $r0
$lr = t2DoLoopStart killed $r0
bb.2.while.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
%gap.057 = sdiv i32 %gap.057.in, 2
%cmp252 = icmp slt i32 %gap.057, %n
%tmp = sub i32 %n, %gap.057
call void @llvm.set.loop.iterations.i32(i32 %tmp)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp)
br i1 %cmp252, label %for.cond4.preheader.preheader, label %for.cond.loopexit

for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
Expand All @@ -44,7 +44,7 @@
%lsr.iv2 = phi i32* [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ]
%lsr.iv = phi i32* [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ]
%i.053 = phi i32 [ %inc, %for.inc16 ], [ %gap.057, %for.cond4.preheader.preheader ]
%tmp8 = phi i32 [ %tmp, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ]
%tmp8 = phi i32 [ %start, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ]
%j.048 = sub nsw i32 %i.053, %gap.057
%cmp549 = icmp sgt i32 %j.048, -1
br i1 %cmp549, label %land.rhs.preheader, label %for.inc16
Expand Down Expand Up @@ -93,7 +93,7 @@
}

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0

; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
Expand Down Expand Up @@ -208,7 +208,7 @@ body: |
renamable $lr = t2SUBrs renamable $r1, renamable $r2, 9, 14, $noreg, $noreg
renamable $r9 = t2ASRri renamable $r2, 1, 14, $noreg, $noreg
t2CMPrs renamable $r1, killed renamable $r2, 9, 14, $noreg, implicit-def $cpsr
t2DoLoopStart renamable $lr
$lr = t2DoLoopStart renamable $lr
tBcc %bb.2, 13, killed $cpsr
bb.4.for.cond4.preheader.preheader:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
entry:
%scevgep = getelementptr i32, i32* %q, i32 -1
%scevgep3 = getelementptr i32, i32* %p, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %n)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %n)
br label %preheader

preheader:
Expand All @@ -20,7 +20,7 @@
while.body: ; preds = %while.body, %entry
%lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %preheader ]
%lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader ]
%0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
%0 = phi i32 [ %start, %preheader ], [ %2, %while.body ]
%scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
%scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
%1 = load i32, i32* %scevgep6, align 4
Expand All @@ -35,7 +35,7 @@
ret i32 0
}

declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0

attributes #0 = { noduplicate nounwind }
Expand Down Expand Up @@ -120,7 +120,7 @@ body: |
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
t2DoLoopStart $r0
$lr = t2DoLoopStart $r0
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
br i1 %cmp, label %exit, label %loop.ph

loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body

loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <4 x i32>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <4 x i32>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
Expand Down Expand Up @@ -43,11 +43,11 @@
br i1 %cmp, label %exit, label %loop.ph

loop.ph: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %iters)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
br label %loop.body

loop.body: ; preds = %loop.body, %loop.ph
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %iters, %loop.ph ]
%lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
%count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
%addr.a = phi <8 x i16>* [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
%addr.b = phi <8 x i16>* [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
Expand All @@ -72,7 +72,7 @@
ret void
}

declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
Expand Down Expand Up @@ -160,7 +160,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$lr = t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.2.loop.body:
Expand Down Expand Up @@ -261,7 +261,7 @@ body: |
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$lr = t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.2.loop.body:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,29 @@ define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noa
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: .LBB0_3: @ %for.body4.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh.w r6, [r0, r5, lsl #1]
; CHECK-NEXT: ldrh.w r7, [r1, r5, lsl #1]
; CHECK-NEXT: add r6, r7
; CHECK-NEXT: strh.w r6, [r4, r5, lsl #1]
; CHECK-NEXT: adds r5, #1
; CHECK-NEXT: ldrh.w r5, [r0, r6, lsl #1]
; CHECK-NEXT: ldrh.w r7, [r1, r6, lsl #1]
; CHECK-NEXT: add r5, r7
; CHECK-NEXT: strh.w r5, [r4, r6, lsl #1]
; CHECK-NEXT: adds r6, #1
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %for.body15.us.preheader
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: .LBB0_5: @ %for.body15.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh.w r7, [r0, r5, lsl #1]
; CHECK-NEXT: ldrh.w r6, [r1, r5, lsl #1]
; CHECK-NEXT: add r6, r7
; CHECK-NEXT: strh.w r6, [r2, r5, lsl #1]
; CHECK-NEXT: adds r5, #1
; CHECK-NEXT: ldrh.w r7, [r0, r6, lsl #1]
; CHECK-NEXT: ldrh.w r5, [r1, r6, lsl #1]
; CHECK-NEXT: add r5, r7
; CHECK-NEXT: strh.w r5, [r2, r6, lsl #1]
; CHECK-NEXT: adds r6, #1
; CHECK-NEXT: le lr, .LBB0_5
; CHECK-NEXT: @ %bb.6: @ %for.cond.cleanup14.us
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
%scevgep = getelementptr i32, i32* %a, i32 -1
%scevgep4 = getelementptr i32, i32* %c, i32 -1
%scevgep8 = getelementptr i32, i32* %b, i32 -1
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body

for.cond.cleanup: ; preds = %for.body, %entry
Expand All @@ -26,7 +26,7 @@
%lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
%lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
%lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
%0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
%0 = phi i32 [ %start, %for.body.preheader ], [ %3, %for.body ]
%size = call i32 @llvm.arm.space(i32 4070, i32 undef)
%scevgep3 = getelementptr i32, i32* %lsr.iv9, i32 1
%1 = load i32, i32* %scevgep3, align 4
Expand All @@ -47,7 +47,7 @@
declare i32 @llvm.arm.space(i32 immarg, i32) #0

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #1
declare i32 @llvm.start.loop.iterations.i32(i32) #1

; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
Expand Down Expand Up @@ -155,7 +155,7 @@ body: |
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
$lr = t2DoLoopStart killed $r3
bb.2.for.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@

vector.ph: ; preds = %entry
%7 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %0, i32 0, !dbg !32
call void @llvm.set.loop.iterations.i32(i32 %6), !dbg !32
%start = call i32 @llvm.start.loop.iterations.i32(i32 %6), !dbg !32
%8 = shl i32 %5, 2, !dbg !32
%9 = sub i32 %N, %8, !dbg !32
br label %vector.body, !dbg !32

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33
%vec.phi = phi <4 x i32> [ %7, %vector.ph ], [ %15, %vector.body ]
%10 = phi i32 [ %6, %vector.ph ], [ %16, %vector.body ]
%10 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ]
%11 = phi i32 [ %N, %vector.ph ], [ %13, %vector.body ]
%lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>*
%12 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %11), !dbg !34
Expand Down Expand Up @@ -59,7 +59,7 @@
declare void @llvm.dbg.value(metadata, metadata, metadata)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)

Expand Down Expand Up @@ -261,7 +261,7 @@ body: |
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, renamable $r3, 19, 14, $noreg, $noreg, debug-location !32
renamable $r3, dead $cpsr = tLSRri killed renamable $r3, 2, 14, $noreg, debug-location !32
renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 18, 14, $noreg, $noreg, debug-location !32
t2DoLoopStart renamable $lr, debug-location !32
$lr = t2DoLoopStart renamable $lr, debug-location !32
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader

for.body.preheader: ; preds = %entry
call void @llvm.set.loop.iterations.i32(i32 %N)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %N)
br label %for.body

for.cond.cleanup: ; preds = %for.inc, %entry
Expand All @@ -30,7 +30,7 @@
%lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
%spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
%found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
%0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
%0 = phi i32 [ %start, %for.body.preheader ], [ %3, %for.inc ]
%1 = load i8, i8* %lsr.iv1, align 1
%2 = zext i8 %1 to i32
switch i32 %2, label %for.inc [
Expand Down Expand Up @@ -58,7 +58,7 @@
}

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #0
declare i32 @llvm.start.loop.iterations.i32(i32) #0

; Function Attrs: noduplicate nounwind
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
Expand Down Expand Up @@ -130,7 +130,7 @@ body: |
liveins: $r0, $r1
$lr = tMOVr $r1, 14, $noreg
t2DoLoopStart killed $r1
$lr = t2DoLoopStart killed $r1
renamable $r1, dead $cpsr = tMOVi8 0, 14, $noreg
renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
renamable $r2, dead $cpsr = tMOVi8 0, 14, $noreg
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <16 x i32> %broadcast.splatinsert10, <16 x i32> undef, <16 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
%induction = or <16 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down Expand Up @@ -82,12 +82,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%induction = add <8 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand Down Expand Up @@ -138,12 +138,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -193,12 +193,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -252,12 +252,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -311,12 +311,12 @@ vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
%broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0
%broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
call void @llvm.set.loop.iterations.i32(i32 %tmp13)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ]
%tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ]
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
Expand Down Expand Up @@ -374,15 +374,15 @@ vector.ph:
%scevgep = getelementptr i32, i32* %A, i32 8
%scevgep30 = getelementptr i32, i32* %C, i32 8
%scevgep37 = getelementptr i32, i32* %B, i32 8
call void @llvm.set.loop.iterations.i32(i32 %v5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %v5)
br label %vector.body

vector.body:
%lsr.iv38 = phi i32* [ %scevgep39, %vector.body ], [ %scevgep37, %vector.ph ]
%lsr.iv31 = phi i32* [ %scevgep32, %vector.body ], [ %scevgep30, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep25, %vector.body ], [ %scevgep, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %v14, %vector.body ]
%v6 = phi i32 [ %v5, %vector.ph ], [ %v15, %vector.body ]
%v6 = phi i32 [ %start, %vector.ph ], [ %v15, %vector.body ]
%lsr.iv3840 = bitcast i32* %lsr.iv38 to <4 x i32>*
%lsr.iv3133 = bitcast i32* %lsr.iv31 to <4 x i32>*
%lsr.iv26 = bitcast i32* %lsr.iv to <4 x i32>*
Expand Down Expand Up @@ -447,15 +447,15 @@ entry:
br i1 %cmp8, label %vector.ph, label %for.cond.cleanup

vector.ph:
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
Expand Down Expand Up @@ -496,15 +496,15 @@ entry:

vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ]
%lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ]
%lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ]
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]

%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
Expand Down Expand Up @@ -547,7 +547,7 @@ entry:

vector.ph: ; preds = %entry
%trip.count.minus.1 = add i32 %N, -1
call void @llvm.set.loop.iterations.i32(i32 %5)
%start = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
Expand All @@ -558,7 +558,7 @@ vector.body: ; preds = %vector.body, %vecto
; AddRec base is not 0:
%index = phi i32 [ 1, %vector.ph ], [ %index.next, %vector.body ]

%6 = phi i32 [ %5, %vector.ph ], [ %8, %vector.body ]
%6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ]
%lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>*
%lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>*
%lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>*
Expand Down Expand Up @@ -589,7 +589,7 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i
declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32 immarg, <2 x i1>)
declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32 immarg, <2 x i1>, <2 x i64>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
Expand Down
Loading