56 changes: 32 additions & 24 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q0, #0x0
Expand All @@ -17,7 +18,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: adds r3, #4
Expand All @@ -26,8 +27,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -83,6 +84,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q0, #0x0
Expand All @@ -92,7 +94,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: adds r3, #4
Expand All @@ -101,8 +103,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrht.s32 q2, [r1], #8
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -158,6 +160,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q0, #0x0
Expand All @@ -167,7 +170,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: adds r3, #4
Expand All @@ -176,8 +179,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -233,6 +236,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q0, #0x0
Expand All @@ -242,7 +246,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: adds r3, #4
Expand All @@ -251,8 +255,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrht.u32 q2, [r1], #8
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -308,6 +312,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB4_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vmov.i32 q0, #0x0
Expand All @@ -317,7 +322,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: add.w lr, r3, r12, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
; CHECK-NEXT: adds r3, #4
Expand All @@ -326,8 +331,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrwt.u32 q2, [r1], #16
; CHECK-NEXT: vmla.u32 q0, q2, r0
; CHECK-NEXT: le lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -618,17 +623,18 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB6_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: .LBB6_1: @ %vector.body
; CHECK-NEXT: .LBB6_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
; CHECK-NEXT: vldrh.s32 q0, [r0], #8
; CHECK-NEXT: vldrh.s32 q1, [r1], #8
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB6_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp10 = icmp eq i32 %N, 0
Expand Down Expand Up @@ -919,17 +925,18 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r4, pc}
; CHECK-NEXT: .LBB8_1: @ %vector.ph
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: dlstp.32 lr, r12
; CHECK-NEXT: .LBB8_1: @ %vector.body
; CHECK-NEXT: .LBB8_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r4, #4
; CHECK-NEXT: vldrh.u32 q0, [r0], #8
; CHECK-NEXT: vldrh.u32 q1, [r1], #8
; CHECK-NEXT: vmlas.u32 q1, q0, r2
; CHECK-NEXT: vstrw.32 q1, [r3], #16
; CHECK-NEXT: letp lr, .LBB8_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB8_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp10 = icmp eq i32 %N, 0
Expand Down Expand Up @@ -1206,17 +1213,18 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB10_1: @ %vector.ph
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB10_1: @ %vector.body
; CHECK-NEXT: .LBB10_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add.w r12, r12, #8
; CHECK-NEXT: vldrb.u16 q0, [r1], #8
; CHECK-NEXT: vldrb.u16 q1, [r2], #8
; CHECK-NEXT: vmul.i16 q0, q1, q0
; CHECK-NEXT: vstrh.16 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB10_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB10_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp10 = icmp eq i32 %N, 0
Expand Down
173 changes: 89 additions & 84 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir

Large diffs are not rendered by default.

226 changes: 153 additions & 73 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir
Original file line number Diff line number Diff line change
@@ -1,44 +1,40 @@
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s
# CHECK-NOT: t2LE
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - --verify-machineinstrs | FileCheck %s

--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-unknown"

%struct.head_s = type { %struct.head_s*, %struct.data_s* }
%struct.data_s = type { i16, i16 }

; Function Attrs: norecurse nounwind readonly
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 {

define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr {
entry:
%idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
%tmp = load i16, i16* %idx, align 2
%cmp = icmp sgt i16 %tmp, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader

while.cond9.preheader: ; preds = %entry
%0 = icmp eq %struct.head_s* %list, null
br i1 %0, label %return, label %land.rhs11.lr.ph

land.rhs11.lr.ph: ; preds = %while.cond9.preheader
%data16143 = bitcast %struct.data_s* %info to i16*
%tmp1 = load i16, i16* %data16143, align 2
%conv15 = sext i16 %tmp1 to i32
br label %land.rhs11

while.cond.preheader: ; preds = %entry
%1 = icmp eq %struct.head_s* %list, null
br i1 %1, label %return, label %land.rhs.preheader

land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs

while.body: ; preds = %land.rhs
%next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
%tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4
%tobool = icmp eq %struct.head_s* %tmp4, null
br i1 %tobool, label %return, label %land.rhs

land.rhs: ; preds = %land.rhs.preheader, %while.body
%list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ]
%info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
Expand All @@ -47,13 +43,13 @@
%tmp3 = load i16, i16* %idx3, align 2
%cmp7 = icmp eq i16 %tmp3, %tmp
br i1 %cmp7, label %return, label %while.body

while.body19: ; preds = %land.rhs11
%next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
%tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4
%tobool10 = icmp eq %struct.head_s* %tmp8, null
br i1 %tobool10, label %return, label %land.rhs11

land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
%list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ]
%info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
Expand All @@ -64,18 +60,16 @@
%and = zext i16 %2 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19

return: ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ]
ret %struct.head_s* %retval.0
}

attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }


...
---
name: search
alignment: 1
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
Expand Down Expand Up @@ -112,73 +106,159 @@ callSites: []
constants: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: search
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x50000000), %bb.6(0x30000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx)
; CHECK: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: tBcc %bb.6, 13 /* CC::le */, killed $cpsr
; CHECK: bb.1.while.cond.preheader:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r2
; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: tB %bb.2, 14 /* CC::al */, $noreg
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $r0, $r2
; CHECK: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg
; CHECK: bb.3.land.rhs:
; CHECK: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2)
; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3)
; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: tBcc %bb.5, 0 /* CC::eq */, killed $cpsr
; CHECK: bb.4.while.body:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4)
; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: tB %bb.3, 14 /* CC::al */, $noreg
; CHECK: bb.5.return:
; CHECK: liveins: $r0
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
; CHECK: bb.6.while.cond9.preheader:
; CHECK: successors: %bb.7(0x80000000)
; CHECK: liveins: $r0, $r1
; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: tB %bb.7, 14 /* CC::al */, $noreg
; CHECK: bb.7.land.rhs11.lr.ph:
; CHECK: successors: %bb.8(0x80000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143)
; CHECK: bb.8.land.rhs11:
; CHECK: successors: %bb.9(0x80000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12)
; CHECK: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2)
; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 8, implicit-def $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: tB %bb.9, 14 /* CC::al */, $noreg
; CHECK: bb.9.while.body19:
; CHECK: successors: %bb.8(0x80000000)
; CHECK: liveins: $r0, $r1
; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205)
; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: tB %bb.8, 14 /* CC::al */, $noreg
bb.0.entry:
successors: %bb.3(0x50000000), %bb.1(0x30000000)
successors: %bb.2(0x50000000), %bb.1(0x30000000)
liveins: $r0, $r1
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13, killed $cpsr
bb.3.while.cond.preheader:
successors: %bb.4(0x80000000)
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13 /* CC::le */, killed $cpsr
bb.2.while.cond.preheader:
successors: %bb.3(0x50000000)
liveins: $r0, $r2
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r1 = tUXTH killed renamable $r2, 14, $noreg
renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.3, 14 /* CC::al */, $noreg
bb.3:
successors: %bb.4(0x80000000)
liveins: $r0, $r2
renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg
bb.4.land.rhs:
successors: %bb.6(0x04000000), %bb.5(0x7c000000)
successors: %bb.9(0x04000000), %bb.5(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.6, 0, killed $cpsr
renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
bb.5.while.body:
successors: %bb.4(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.4, 14, $noreg
bb.6.return:
renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.4, 14 /* CC::al */, $noreg
bb.9.return:
liveins: $r0
tBX_RET 14, $noreg, implicit $r0
tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
bb.1.while.cond9.preheader:
successors: %bb.2(0x80000000)
successors: %bb.7(0x50000000)
liveins: $r0, $r1
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143)
bb.2.land.rhs11:
successors: %bb.2(0x7c000000)
renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.7, 14 /* CC::al */, $noreg
bb.7.land.rhs11.lr.ph:
successors: %bb.8(0x80000000)
liveins: $r0, $r1
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143)
bb.8.land.rhs11:
successors: %bb.6(0x80000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data166, align 2)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2)
tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next205)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.6, 14 /* CC::al */, $noreg
bb.6.while.body19:
successors: %bb.8(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205)
tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.2, 14, $noreg
renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
t2B %bb.8, 14 /* CC::al */, $noreg
...
198 changes: 152 additions & 46 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir
Original file line number Diff line number Diff line change
@@ -1,49 +1,39 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-LOB
# RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB

# CHECK-NOLOB-NOT: t2LE

# CHECK-LOB: bb.3.land.rhs:
# CHECK-LOB: tCBZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.3
# CHECK-LOB: bb.6.land.rhs11:
# CHECK-LOB: bb.7.while.body19:
# CHECK-LOB: tCBZ $r0, %bb.8
# CHECK-LOB: t2LE %bb.6
# CHECK-LOB: bb.8:

--- |
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-unknown-unknown"

%struct.head_s = type { %struct.head_s*, %struct.data_s* }
%struct.data_s = type { i16, i16 }

; Function Attrs: norecurse nounwind readonly
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr #0 {
define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr {
entry:
%idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1
%0 = load i16, i16* %idx, align 2
%cmp = icmp sgt i16 %0, -1
br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader

while.cond9.preheader: ; preds = %entry
%1 = icmp eq %struct.head_s* %list, null
br i1 %1, label %return, label %land.rhs11.lr.ph

land.rhs11.lr.ph: ; preds = %while.cond9.preheader
%data16143 = bitcast %struct.data_s* %info to i16*
%2 = load i16, i16* %data16143, align 2
%conv15 = sext i16 %2 to i32
br label %land.rhs11

while.cond.preheader: ; preds = %entry
%3 = icmp eq %struct.head_s* %list, null
br i1 %3, label %return, label %land.rhs.preheader

land.rhs.preheader: ; preds = %while.cond.preheader
br label %land.rhs

land.rhs: ; preds = %land.rhs.preheader, %while.body
%list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ]
%info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1
Expand All @@ -52,13 +42,13 @@
%5 = load i16, i16* %idx3, align 2
%cmp7 = icmp eq i16 %5, %0
br i1 %cmp7, label %return, label %while.body

while.body: ; preds = %land.rhs
%next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s**
%6 = load %struct.head_s*, %struct.head_s** %next4, align 4
%tobool = icmp eq %struct.head_s* %6, null
br i1 %tobool, label %return, label %land.rhs

land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph
%list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ]
%info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1
Expand All @@ -69,20 +59,18 @@
%and = zext i16 %9 to i32
%cmp16 = icmp eq i32 %and, %conv15
br i1 %cmp16, label %return, label %while.body19

while.body19: ; preds = %land.rhs11
%next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s**
%10 = load %struct.head_s*, %struct.head_s** %next206, align 4
%tobool10 = icmp eq %struct.head_s* %10, null
br i1 %tobool10, label %return, label %land.rhs11

return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader
%retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ]
ret %struct.head_s* %retval.0
}

attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+lob,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }


...
---
name: search
Expand Down Expand Up @@ -123,79 +111,197 @@ callSites: []
constants: []
machineFunctionInfo: {}
body: |
; CHECK-LOB-LABEL: name: search
; CHECK-LOB: bb.0.entry:
; CHECK-LOB: successors: %bb.1(0x50000000), %bb.5(0x30000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx)
; CHECK-LOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-LOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr
; CHECK-LOB: bb.1.while.cond.preheader:
; CHECK-LOB: successors: %bb.9(0x30000000), %bb.2(0x50000000)
; CHECK-LOB: liveins: $r0, $r2
; CHECK-LOB: tCBZ $r0, %bb.9
; CHECK-LOB: bb.2.land.rhs.preheader:
; CHECK-LOB: successors: %bb.3(0x80000000)
; CHECK-LOB: liveins: $r0, $r2
; CHECK-LOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg
; CHECK-LOB: bb.3.land.rhs:
; CHECK-LOB: successors: %bb.4(0x80000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2)
; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3)
; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-LOB: t2IT 0, 8, implicit-def $itstate
; CHECK-LOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK-LOB: bb.4.while.body:
; CHECK-LOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4)
; CHECK-LOB: tCBZ $r0, %bb.9
; CHECK-LOB: t2LE %bb.3
; CHECK-LOB: bb.5.while.cond9.preheader:
; CHECK-LOB: successors: %bb.9(0x30000000), %bb.6(0x50000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: tCBZ $r0, %bb.9
; CHECK-LOB: bb.6.land.rhs11.lr.ph:
; CHECK-LOB: successors: %bb.7(0x80000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143)
; CHECK-LOB: bb.7.land.rhs11:
; CHECK-LOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12)
; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2)
; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-LOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr
; CHECK-LOB: bb.8.while.body19:
; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000)
; CHECK-LOB: liveins: $r0, $r1
; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206)
; CHECK-LOB: tCBZ $r0, %bb.9
; CHECK-LOB: t2LE %bb.7
; CHECK-LOB: bb.9:
; CHECK-LOB: successors: %bb.10(0x80000000)
; CHECK-LOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK-LOB: bb.10.return:
; CHECK-LOB: liveins: $r0
; CHECK-LOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
; CHECK-NOLOB-LABEL: name: search
; CHECK-NOLOB: bb.0.entry:
; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.5(0x30000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx)
; CHECK-NOLOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-NOLOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr
; CHECK-NOLOB: bb.1.while.cond.preheader:
; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.2(0x50000000)
; CHECK-NOLOB: liveins: $r0, $r2
; CHECK-NOLOB: tCBZ $r0, %bb.9
; CHECK-NOLOB: bb.2.land.rhs.preheader:
; CHECK-NOLOB: successors: %bb.3(0x80000000)
; CHECK-NOLOB: liveins: $r0, $r2
; CHECK-NOLOB: renamable $r1 = tUXTH killed renamable $r2, 14 /* CC::al */, $noreg
; CHECK-NOLOB: bb.3.land.rhs:
; CHECK-NOLOB: successors: %bb.4(0x80000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2)
; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3)
; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-NOLOB: t2IT 0, 8, implicit-def $itstate
; CHECK-NOLOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK-NOLOB: bb.4.while.body:
; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4)
; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-NOLOB: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
; CHECK-NOLOB: tB %bb.9, 14 /* CC::al */, $noreg
; CHECK-NOLOB: bb.5.while.cond9.preheader:
; CHECK-NOLOB: successors: %bb.9(0x30000000), %bb.6(0x50000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: tCBZ $r0, %bb.9
; CHECK-NOLOB: bb.6.land.rhs11.lr.ph:
; CHECK-NOLOB: successors: %bb.7(0x80000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143)
; CHECK-NOLOB: bb.7.land.rhs11:
; CHECK-NOLOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12)
; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2)
; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-NOLOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr
; CHECK-NOLOB: bb.8.while.body19:
; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000)
; CHECK-NOLOB: liveins: $r0, $r1
; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206)
; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK-NOLOB: tBcc %bb.7, 1 /* CC::ne */, killed $cpsr
; CHECK-NOLOB: bb.9:
; CHECK-NOLOB: successors: %bb.10(0x80000000)
; CHECK-NOLOB: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK-NOLOB: bb.10.return:
; CHECK-NOLOB: liveins: $r0
; CHECK-NOLOB: tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
bb.0.entry:
successors: %bb.5(0x50000000), %bb.1(0x30000000)
liveins: $r0, $r1
renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx)
t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 13, killed $cpsr
bb.5.while.cond.preheader:
successors: %bb.8(0x30000000), %bb.6(0x50000000)
liveins: $r0, $r2
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.6.land.rhs.preheader:
successors: %bb.7(0x80000000)
liveins: $r0, $r2
renamable $r1 = tUXTH killed renamable $r2, 14, $noreg
bb.7.land.rhs:
successors: %bb.8(0x04000000), %bb.7(0x7c000000)
successors: %bb.10(0x80000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2)
renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
bb.10.while.body:
successors: %bb.8(0x04000000), %bb.7(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.7, 1, killed $cpsr
t2B %bb.8, 14, $noreg
bb.1.while.cond9.preheader:
successors: %bb.8(0x30000000), %bb.2(0x50000000)
liveins: $r0, $r1
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.8, 0, killed $cpsr
bb.2.land.rhs11.lr.ph:
successors: %bb.3(0x80000000)
liveins: $r0, $r1
renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143)
bb.3.land.rhs11:
successors: %bb.9(0x04000000), %bb.4(0x7c000000)
liveins: $r0, $r1
renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12)
renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2)
tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.9, 0, killed $cpsr
bb.4.while.body19:
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
liveins: $r0, $r1
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206)
tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3, 1, killed $cpsr
bb.8:
successors: %bb.9(0x80000000)
renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
bb.9.return:
liveins: $r0
tBX_RET 14, $noreg, implicit killed $r0
...
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ body: |
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -120,8 +123,8 @@ body: |
; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: dead $lr = t2DLS renamable $r12
; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r3
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
Expand All @@ -132,8 +135,8 @@ body: |
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.middle.block:
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.middle.block:
; CHECK: liveins: $q0
; CHECK: renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $r0
Expand All @@ -145,6 +148,11 @@ body: |
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $lr, $r7
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -158,8 +166,8 @@ body: |
t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14, $noreg
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $q0, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
Expand All @@ -172,10 +180,10 @@ body: |
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.middle.block:
bb.3.middle.block:
liveins: $q0
renamable $r0 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg
Expand Down
28 changes: 18 additions & 10 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,15 @@ body: |
; CHECK-LABEL: name: non_masked_load
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 2, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: renamable $r0 = tUXTB killed renamable $r0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -129,8 +132,8 @@ body: |
; CHECK: renamable $r3 = t2LSRri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3
; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg
; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1
Expand All @@ -140,8 +143,8 @@ body: |
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg
; CHECK: renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2
; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.middle.block:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.middle.block:
; CHECK: liveins: $q0, $q1, $r3
; CHECK: renamable $vpr = MVE_VCTP8 killed renamable $r3, 0, $noreg
; CHECK: renamable $q0 = MVE_VPSEL killed renamable $q0, killed renamable $q1, 0, killed renamable $vpr
Expand All @@ -158,6 +161,11 @@ body: |
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
renamable $r0 = tUXTB killed renamable $r0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $lr
frame-setup tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -174,8 +182,8 @@ body: |
renamable $r3 = t2SUBrs renamable $r2, killed renamable $r3, 34, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $q0, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg
Expand All @@ -187,10 +195,10 @@ body: |
renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $q0 = MVE_VADDi8 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.middle.block:
bb.3.middle.block:
liveins: $q0, $q1, $r3
renamable $vpr = MVE_VCTP8 killed renamable $r3, 0, $noreg
Expand Down
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,17 @@ body: |
; CHECK: tCMPi8 renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 8, implicit-def $itstate
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r12 = t2ADDri renamable $r3, 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2BICri killed renamable $r12, 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3
; CHECK: renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg
Expand All @@ -126,8 +129,8 @@ body: |
; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1618, align 1)
; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1)
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.for.cond.cleanup:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -142,15 +145,20 @@ body: |
tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $lr
renamable $r12 = t2ADDri renamable $r3, 15, 14, $noreg, $noreg
renamable $lr = t2MOVi 1, 14, $noreg, $noreg
renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg
Expand All @@ -161,10 +169,10 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1)
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.for.cond.cleanup:
bb.3.for.cond.cleanup:
tPOP_RET 14, $noreg, def $r7, def $pc
...
49 changes: 28 additions & 21 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocaptur
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: uxtbeq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: add.w r3, r2, #15
; CHECK-NEXT: vmov.i32 q1, #0x0
Expand All @@ -17,7 +18,7 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocaptur
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #4
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.8 r2
; CHECK-NEXT: vmov q0, q1
Expand All @@ -28,8 +29,8 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocaptur
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u8 q2, [r1], #16
; CHECK-NEXT: vadd.i8 q1, q1, q2
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
Expand Down Expand Up @@ -79,6 +80,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: sxtheq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: adds r3, r2, #7
; CHECK-NEXT: vmov.i32 q1, #0x0
Expand All @@ -87,7 +89,7 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r12, lsr #3
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.16 r2
; CHECK-NEXT: vmov q0, q1
Expand All @@ -98,8 +100,8 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu
; CHECK-NEXT: vpst
; CHECK-NEXT: vldrbt.u16 q2, [r1], #8
; CHECK-NEXT: vadd.i16 q1, q1, q2
; CHECK-NEXT: le lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: le lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
Expand Down Expand Up @@ -151,17 +153,18 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocaptur
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: uxtbeq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.8 lr, r2
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q1, [r1], #16
; CHECK-NEXT: vldrb.u8 q2, [r0], #16
; CHECK-NEXT: vsub.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q0, q1, q0
; CHECK-NEXT: letp lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: letp lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
Expand Down Expand Up @@ -210,17 +213,18 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocaptu
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: sxtheq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u16 q1, [r0], #8
; CHECK-NEXT: vldrb.u16 q2, [r1], #8
; CHECK-NEXT: vsub.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q0, q1, q0
; CHECK-NEXT: letp lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: letp lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
Expand Down Expand Up @@ -271,17 +275,18 @@ define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocaptur
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: uxtbeq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB4_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.8 lr, r2
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q1, [r0], #16
; CHECK-NEXT: vldrb.u8 q2, [r1], #16
; CHECK-NEXT: vmul.i8 q1, q2, q1
; CHECK-NEXT: vadd.i8 q0, q1, q0
; CHECK-NEXT: letp lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: letp lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vaddv.u8 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: uxtb r0, r0
Expand Down Expand Up @@ -330,17 +335,18 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocaptu
; CHECK-NEXT: moveq r0, #0
; CHECK-NEXT: sxtheq r0, r0
; CHECK-NEXT: bxeq lr
; CHECK-NEXT: .LBB5_1: @ %vector.ph
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u16 q1, [r0], #8
; CHECK-NEXT: vldrb.u16 q2, [r1], #8
; CHECK-NEXT: vmul.i16 q1, q2, q1
; CHECK-NEXT: vadd.i16 q0, q1, q0
; CHECK-NEXT: letp lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %middle.block
; CHECK-NEXT: letp lr, .LBB5_2
; CHECK-NEXT: @ %bb.3: @ %middle.block
; CHECK-NEXT: vaddv.u16 r0, q0
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: sxth r0, r0
Expand Down Expand Up @@ -636,6 +642,7 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r4, pc}
; CHECK-NEXT: .LBB8_5: @ %vector.ph
; CHECK-NEXT: movw r1, :lower16:days
; CHECK-NEXT: movt r1, :upper16:days
; CHECK-NEXT: movs r2, #52
Expand All @@ -645,12 +652,12 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: subs r0, r3, #1
; CHECK-NEXT: dlstp.32 lr, r0
; CHECK-NEXT: .LBB8_5: @ %vector.body
; CHECK-NEXT: .LBB8_6: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
; CHECK-NEXT: vadd.i32 q1, q0, q1
; CHECK-NEXT: letp lr, .LBB8_5
; CHECK-NEXT: @ %bb.6: @ %middle.block
; CHECK-NEXT: letp lr, .LBB8_6
; CHECK-NEXT: @ %bb.7: @ %middle.block
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vaddv.u32 r0, q0
; CHECK-NEXT: pop {r4, pc}
Expand Down
52 changes: 34 additions & 18 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir
Original file line number Diff line number Diff line change
Expand Up @@ -125,20 +125,23 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
; CHECK: bb.1.loop.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.addr.b, align 4)
; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.addr.a, align 4)
; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 4)
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.exit:
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -151,12 +154,17 @@ body: |
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.1.loop.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r12
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -170,10 +178,10 @@ body: |
renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
MVE_VPST 8, implicit $vpr
renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.2.exit:
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
...
Expand Down Expand Up @@ -217,11 +225,14 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = MVE_DLSTP_16 killed renamable $r3
; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
; CHECK: bb.1.loop.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r4
; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg
; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
Expand All @@ -230,8 +241,8 @@ body: |
; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg
; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 2)
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.exit:
; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -244,12 +255,17 @@ body: |
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r12
$r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg
bb.1.loop.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r4
renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg
Expand All @@ -264,10 +280,10 @@ body: |
renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg
MVE_VPST 8, implicit $vpr
renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2)
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.2.exit:
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
...
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,23 @@ body: |
; CHECK: tCMPi8 $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 8, implicit-def $itstate
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.for.body.preheader:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg
; CHECK: $lr = t2DLS killed $r3
; CHECK: bb.1.for.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.for.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: dead renamable $r3 = SPACE 4070, undef renamable $r0
; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3)
; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7)
; CHECK: renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14 /* CC::al */, $noreg
; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep11)
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.for.cond.cleanup:
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -143,14 +146,19 @@ body: |
tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.for.body.preheader:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r7, $lr
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
$lr = tMOVr $r3, 14, $noreg
t2DoLoopStart killed $r3
bb.1.for.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.for.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1, $r2
dead renamable $r3 = SPACE 4070, undef renamable $r0
Expand All @@ -159,10 +167,10 @@ body: |
renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep11)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.for.cond.cleanup:
bb.3.for.cond.cleanup:
tPOP_RET 14, $noreg, def $r7, def $pc
...
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: vqadd.u16 q0, q1, q0
; CHECK-NEXT: vstrh.16 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %while.end
; CHECK-NEXT: letp lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp7 = icmp eq i32 %blockSize, 0
Expand Down Expand Up @@ -58,15 +59,16 @@ define arm_aapcs_vfpcc void @sadd_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: vqadd.s16 q0, q1, q0
; CHECK-NEXT: vstrh.16 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %while.end
; CHECK-NEXT: letp lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp7 = icmp eq i32 %blockSize, 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, floa
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vabs.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %while.end
; CHECK-NEXT: letp lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp3 = icmp eq i32 %blockSize, 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, flo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vrinta.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -54,14 +55,15 @@ define arm_aapcs_vfpcc void @rint(float* noalias nocapture readonly %pSrcA, floa
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vrintx.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -99,14 +101,15 @@ define arm_aapcs_vfpcc void @trunc(float* noalias nocapture readonly %pSrcA, flo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB2_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: .LBB2_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vrintz.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB2_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -144,14 +147,15 @@ define arm_aapcs_vfpcc void @ceil(float* noalias nocapture readonly %pSrcA, floa
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB3_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: .LBB3_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vrintp.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB3_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -189,14 +193,15 @@ define arm_aapcs_vfpcc void @floor(float* noalias nocapture readonly %pSrcA, flo
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB4_1: @ %vector.ph
; CHECK-NEXT: dlstp.32 lr, r2
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: .LBB4_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
; CHECK-NEXT: vrintm.f32 q0, q0
; CHECK-NEXT: vstrw.32 q0, [r1], #16
; CHECK-NEXT: letp lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: letp lr, .LBB4_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp5 = icmp eq i32 %n, 0
Expand Down Expand Up @@ -235,6 +240,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB5_1: @ %vector.ph
; CHECK-NEXT: adds r3, r2, #3
; CHECK-NEXT: vdup.32 q1, r2
; CHECK-NEXT: bic r3, r3, #3
Expand All @@ -245,7 +251,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: vldrw.u32 q0, [r3]
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: .LBB5_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vadd.i32 q2, q0, r12
; CHECK-NEXT: vdup.32 q3, r12
Expand All @@ -261,11 +267,11 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA,
; CHECK-NEXT: vrintr.f32 s12, s8
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q3, [r1], #16
; CHECK-NEXT: le lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: le lr, .LBB5_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: @ %bb.4:
; CHECK-NEXT: .LCPI5_0:
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 1 @ 0x1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@ define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.ph
; CHECK-NEXT: subs r3, #1
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: .LBB0_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: vqsub.u16 q0, q1, q0
; CHECK-NEXT: vstrh.16 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %while.end
; CHECK-NEXT: letp lr, .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp7 = icmp eq i32 %blockSize, 0
Expand Down Expand Up @@ -59,16 +60,17 @@ define arm_aapcs_vfpcc void @ssub_sat(i16* noalias nocapture readonly %pSrcA, i1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: popeq {r7, pc}
; CHECK-NEXT: .LBB1_1: @ %vector.ph
; CHECK-NEXT: subs r3, #1
; CHECK-NEXT: dlstp.16 lr, r3
; CHECK-NEXT: .LBB1_1: @ %vector.body
; CHECK-NEXT: .LBB1_2: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrh.u16 q0, [r1], #16
; CHECK-NEXT: vldrh.u16 q1, [r0], #16
; CHECK-NEXT: vqsub.s16 q0, q1, q0
; CHECK-NEXT: vstrh.16 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB1_1
; CHECK-NEXT: @ %bb.2: @ %while.end
; CHECK-NEXT: letp lr, .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %while.end
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp7 = icmp eq i32 %blockSize, 0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s

; The following functions should all fail to become tail-predicated.
Expand Down Expand Up @@ -453,7 +454,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry
ret void
}

; adding 5, instead of 4, to index.
; adding 5, instead of 4, to index.
define void @wrong_index_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 %N) {
entry:
%cmp8 = icmp eq i32 %N, 0
Expand Down
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ body: |
; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 0, 8, implicit-def $itstate
; CHECK: tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r5, def $pc, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
Expand All @@ -91,8 +94,8 @@ body: |
; CHECK: $r12 = t2MOVTi16 killed $r12, 65535, 14 /* CC::al */, $noreg
; CHECK: dead $lr = t2DLS renamable $r3
; CHECK: $r5 = tMOVr killed $r3, 14 /* CC::al */, $noreg
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r5, $r12
; CHECK: $r3 = tMOVr $r12, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg
Expand All @@ -103,8 +106,8 @@ body: |
; CHECK: early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store 2 into %ir.lsr.iv.2)
; CHECK: renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.exit:
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r5, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -117,6 +120,11 @@ body: |
tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 0, 8, implicit-def $itstate
tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r5, def $pc, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r5, $lr
renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
Expand All @@ -127,8 +135,8 @@ body: |
t2DoLoopStart renamable $r3
$r5 = tMOVr killed $r3, 14 /* CC::al */, $noreg
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r5, $r12
$r3 = tMOVr $r12, 14 /* CC::al */, $noreg
Expand All @@ -141,10 +149,10 @@ body: |
renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.2.exit:
bb.3.exit:
tPOP_RET 14 /* CC::al */, $noreg, def $r5, def $pc
...
52 changes: 34 additions & 18 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,14 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = t2DLS renamable $r4
; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
; CHECK: bb.1.loop.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg
Expand All @@ -140,8 +143,8 @@ body: |
; CHECK: renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.exit:
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -154,12 +157,17 @@ body: |
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.1.loop.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r12
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -174,10 +182,10 @@ body: |
renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg
MVE_VPST 8, implicit $vpr
renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.2.exit:
bb.3.exit:
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
...
Expand Down Expand Up @@ -221,11 +229,14 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
; CHECK: bb.1.loop.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
; CHECK: dead $lr = t2DLS renamable $r4
; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
; CHECK: bb.1.loop.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.loop.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
Expand All @@ -238,8 +249,8 @@ body: |
; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.exit:
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.exit:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -252,12 +263,17 @@ body: |
tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
bb.1.loop.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r4, $lr
renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
t2DoLoopStart renamable $r4
$r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg
bb.1.loop.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.loop.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $r0, $r1, $r2, $r3, $r12
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -272,10 +288,10 @@ body: |
renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg
MVE_VPST 8, implicit $vpr
renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4)
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.2.exit:
bb.3.exit:
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
...
235 changes: 154 additions & 81 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ body: |
; CHECK: t2IT 0, 4, implicit-def $itstate
; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate
; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -127,8 +130,8 @@ body: |
; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: dead $lr = t2DLS renamable $r12
; CHECK: $r3 = tMOVr killed $r12, 14 /* CC::al */, $noreg
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $q1, $r0, $r1, $r2, $r3
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0
Expand All @@ -140,8 +143,8 @@ body: |
; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.middle.block:
; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.middle.block:
; CHECK: liveins: $q0, $q1, $r2
; CHECK: renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $vpr = MVE_VCTP32 killed renamable $r0, 0, $noreg
Expand All @@ -156,6 +159,11 @@ body: |
t2IT 0, 4, implicit-def $itstate
renamable $r0 = tMOVi8 $noreg, 0, 0, $cpsr, implicit killed $r0, implicit $itstate
tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $lr, $r7
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
Expand All @@ -169,8 +177,8 @@ body: |
t2DoLoopStart renamable $r12
$r3 = tMOVr killed $r12, 14, $noreg
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $q1, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
Expand All @@ -184,10 +192,10 @@ body: |
renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.middle.block:
bb.3.middle.block:
liveins: $q0, $q1, $r2
renamable $r0, dead $cpsr = tADDi3 killed renamable $r2, 4, 14, $noreg
Expand Down
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,19 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4)
; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4)
; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4)
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.for.cond.cleanup:
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -130,15 +133,20 @@ body: |
tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r7, $lr
renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg
renamable $lr = t2MOVi 1, 14, $noreg, $noreg
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -150,10 +158,10 @@ body: |
MVE_VPST 8, implicit $vpr
renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.for.cond.cleanup:
bb.3.for.cond.cleanup:
tPOP_RET 14, $noreg, def $r7, def $pc
...
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,19 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4)
; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4)
; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4)
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.for.cond.cleanup:
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -129,15 +132,20 @@ body: |
tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r7, $lr
renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg
renamable $lr = t2MOVi 1, 14, $noreg, $noreg
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -149,10 +157,10 @@ body: |
MVE_VPST 8, implicit $vpr
renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.for.cond.cleanup:
bb.3.for.cond.cleanup:
tPOP_RET 14, $noreg, def $r7, def $pc
...
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,19 @@ body: |
; CHECK: tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2IT 11, 8, implicit-def $itstate
; CHECK: tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: bb.1.vector.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2
; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4)
; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4)
; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4)
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: bb.2.for.cond.cleanup:
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
successors: %bb.1(0x80000000)
Expand All @@ -129,15 +132,20 @@ body: |
tCMPi8 renamable $r3, 1, 14, $noreg, implicit-def $cpsr
t2IT 11, 8, implicit-def $itstate
tPOP_RET 11, killed $cpsr, def $r7, def $pc, implicit killed $itstate
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2, $r3, $r7, $lr
renamable $r12 = t2ADDri renamable $r3, 3, 14, $noreg, $noreg
renamable $lr = t2MOVi 1, 14, $noreg, $noreg
renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
t2DoLoopStart renamable $lr
bb.1.vector.body:
successors: %bb.1(0x7c000000), %bb.2(0x04000000)
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $r0, $r1, $r2, $r3
renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
Expand All @@ -149,10 +157,10 @@ body: |
MVE_VPST 8, implicit $vpr
renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4)
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14, $noreg
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14, $noreg
bb.2.for.cond.cleanup:
bb.3.for.cond.cleanup:
tPOP_RET 14, $noreg, def $r7, def $pc
...
Loading