48 changes: 18 additions & 30 deletions llvm/test/CodeGen/Thumb2/mve-pred-and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -575,11 +575,9 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr q2, q0, q1
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r1, r2, d4
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -604,33 +602,27 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpeq_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r12, r2, d4
; CHECK-NEXT: vmov r3, r1, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: eor.w r2, r3, r12
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -656,29 +648,25 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqr_v2i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
; CHECK-LABEL: cmpeqr_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s7
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: eors r3, r1
; CHECK-NEXT: eors r2, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: vmov r12, r3, d2
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: eor.w r0, r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
; CHECK-NEXT: vmov q2[3], q2[1], r0, r2
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -389,11 +389,9 @@ define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vmov r0, s1
; CHECK-LE-NEXT: vmov r1, s0
; CHECK-LE-NEXT: vmov r2, s2
; CHECK-LE-NEXT: vmov r0, r1, d0
; CHECK-LE-NEXT: orrs r0, r1
; CHECK-LE-NEXT: vmov r1, s3
; CHECK-LE-NEXT: vmov r1, r2, d1
; CHECK-LE-NEXT: cset r0, eq
; CHECK-LE-NEXT: orrs r1, r2
; CHECK-LE-NEXT: cset r1, eq
Expand All @@ -410,11 +408,9 @@ define arm_aapcs_vfpcc i2 @bitcast_from_v2i1(<2 x i64> %a) {
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vmov r0, s6
; CHECK-BE-NEXT: vmov r1, s7
; CHECK-BE-NEXT: vmov r2, s5
; CHECK-BE-NEXT: vmov r0, r1, d3
; CHECK-BE-NEXT: orrs r0, r1
; CHECK-BE-NEXT: vmov r1, s4
; CHECK-BE-NEXT: vmov r1, r2, d2
; CHECK-BE-NEXT: cset r0, eq
; CHECK-BE-NEXT: orrs r1, r2
; CHECK-BE-NEXT: cset r1, eq
Expand Down
58 changes: 28 additions & 30 deletions llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
; CHECK-LABEL: sext_v2i1_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r2, r12, d0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: sbcs.w r0, r3, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: sbcs.w r1, r2, r1
; CHECK-NEXT: rsbs r1, r2, #0
; CHECK-NEXT: sbcs.w r1, r3, r12
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
Expand Down Expand Up @@ -119,29 +117,29 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
; CHECK-LABEL: zext_v2i1_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: adr r1, .LCPI7_0
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: sbcs.w r1, r0, r1
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: sbcs.w r2, r0, r2
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov lr, r12, d0
; CHECK-NEXT: adr r2, .LCPI7_0
; CHECK-NEXT: vldrw.u32 q0, [r2]
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: sbcs.w r0, r3, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: rsbs.w r1, lr, #0
; CHECK-NEXT: sbcs.w r1, r3, r12
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI7_0:
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,9 @@ entry:
define arm_aapcs_vfpcc void @store_v2i1(<2 x i1> *%dst, <2 x i64> %a) {
; CHECK-LE-LABEL: store_v2i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vmov r1, s1
; CHECK-LE-NEXT: vmov r2, s0
; CHECK-LE-NEXT: vmov r3, s2
; CHECK-LE-NEXT: vmov r1, r2, d0
; CHECK-LE-NEXT: orrs r1, r2
; CHECK-LE-NEXT: vmov r2, s3
; CHECK-LE-NEXT: vmov r2, r3, d1
; CHECK-LE-NEXT: cset r1, eq
; CHECK-LE-NEXT: orrs r2, r3
; CHECK-LE-NEXT: cset r2, eq
Expand All @@ -337,11 +335,9 @@ define arm_aapcs_vfpcc void @store_v2i1(<2 x i1> *%dst, <2 x i64> %a) {
; CHECK-BE-LABEL: store_v2i1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
; CHECK-BE-NEXT: vmov r1, s6
; CHECK-BE-NEXT: vmov r2, s7
; CHECK-BE-NEXT: vmov r3, s5
; CHECK-BE-NEXT: vmov r1, r2, d3
; CHECK-BE-NEXT: orrs r1, r2
; CHECK-BE-NEXT: vmov r2, s4
; CHECK-BE-NEXT: vmov r2, r3, d2
; CHECK-BE-NEXT: cset r1, eq
; CHECK-BE-NEXT: orrs r2, r3
; CHECK-BE-NEXT: cset r2, eq
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/Thumb2/mve-pred-not.ll
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -351,11 +349,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpeq_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
36 changes: 13 additions & 23 deletions llvm/test/CodeGen/Thumb2/mve-pred-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -377,25 +377,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -421,33 +417,27 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpeq_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r12, r2, d4
; CHECK-NEXT: vmov r3, r1, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: eor.w r2, r3, r12
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -459,27 +459,23 @@ define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a,
; CHECK-NEXT: vmov.i8 q2, #0xff
; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vpsel q3, q2, q1
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov r0, r1, d6
; CHECK-NEXT: vmov.16 q0[0], r0
; CHECK-NEXT: vmov r0, s13
; CHECK-NEXT: vmov.16 q0[1], r0
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vmov.16 q0[1], r1
; CHECK-NEXT: vmov r0, r1, d7
; CHECK-NEXT: vmov.16 q0[2], r0
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.16 q0[3], r0
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: vldrw.u32 q3, [r0]
; CHECK-NEXT: vmov.16 q0[3], r1
; CHECK-NEXT: vcmp.i32 eq, q3, zr
; CHECK-NEXT: vpsel q1, q2, q1
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov.16 q0[4], r0
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.16 q0[5], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.16 q0[5], r1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov.16 q0[6], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: add r0, sp, #32
; CHECK-NEXT: vmov.16 q0[7], r1
; CHECK-NEXT: vcmp.i16 ne, q0, zr
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: add r0, sp, #16
Expand Down
48 changes: 16 additions & 32 deletions llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,40 +72,34 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r1, r2, d4
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vbic q3, q3, q2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down Expand Up @@ -202,40 +196,34 @@ define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r1, r2, d4
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vbic q3, q3, q2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down Expand Up @@ -441,25 +429,21 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vmov r2, s7
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: csetm r12, ne
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: csetm r4, ne
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: csetm lr, ne
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: cset r2, eq
Expand Down
36 changes: 13 additions & 23 deletions llvm/test/CodeGen/Thumb2/mve-pred-xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -457,25 +457,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: cmpeqz_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -501,33 +497,27 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: cmpeq_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov r12, r2, d4
; CHECK-NEXT: vmov r3, r1, d2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: eor.w r2, r3, r12
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
412 changes: 183 additions & 229 deletions llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll

Large diffs are not rendered by default.

190 changes: 87 additions & 103 deletions llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,48 +36,44 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: eor.w r12, r1, r0
; CHECK-NEXT: adcs r0, r1
; CHECK-NEXT: eors r1, r0
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: adds r2, r2, r0
; CHECK-NEXT: eor.w r12, r3, r1
; CHECK-NEXT: adc.w r0, r3, r1
; CHECK-NEXT: eor.w r1, r3, r0
; CHECK-NEXT: vmov r3, r4, d0
; CHECK-NEXT: bic.w r1, r1, r12
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov lr, r1, d2
; CHECK-NEXT: cset r12, mi
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: asrne r2, r0, #31
; CHECK-NEXT: adds r4, r4, r5
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: eor.w lr, r1, r3
; CHECK-NEXT: adcs r3, r1
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: bic.w r1, r1, lr
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r1, mi
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: eor.w r5, r4, r1
; CHECK-NEXT: adcs r1, r4
; CHECK-NEXT: eors r4, r1
; CHECK-NEXT: bic.w r5, r4, r5
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: cset r5, mi
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: asrne r4, r3, #31
; CHECK-NEXT: asrne r3, r1, #31
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: cset r2, mi
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cinv r2, r5, eq
; CHECK-NEXT: cinv r2, r3, eq
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csel r0, r2, r0, ne
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r2, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cinv r2, r5, eq
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csel r1, r2, r3, ne
; CHECK-NEXT: cinv r2, r3, eq
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: csel r1, r2, r1, ne
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
Expand Down Expand Up @@ -120,33 +116,29 @@ define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: adcs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov r4, r5, d0
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov r3, r2, d2
; CHECK-NEXT: adcs lr, r12, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: adds r4, r4, r5
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: adcs r3, r12, #0
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: adds r3, r3, r4
; CHECK-NEXT: adcs r2, r5
; CHECK-NEXT: adcs r5, r12, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: movne.w r3, #-1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r1, #-1
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
Expand Down Expand Up @@ -189,46 +181,42 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: eor.w r12, r1, r0
; CHECK-NEXT: sbc.w r0, r1, r0
; CHECK-NEXT: eors r1, r0
; CHECK-NEXT: ands.w r1, r1, r12
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: eor.w r12, r3, r1
; CHECK-NEXT: sbc.w r1, r3, r1
; CHECK-NEXT: eor.w r2, r3, r1
; CHECK-NEXT: vmov r3, r4, d0
; CHECK-NEXT: ands.w r2, r2, r12
; CHECK-NEXT: vmov lr, r2, d2
; CHECK-NEXT: cset r12, mi
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: asrne r2, r0, #31
; CHECK-NEXT: subs r4, r5, r4
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: eor.w lr, r1, r3
; CHECK-NEXT: sbc.w r3, r1, r3
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: ands.w r1, r1, lr
; CHECK-NEXT: cset r1, mi
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: asrne r0, r1, #31
; CHECK-NEXT: subs.w r3, r3, lr
; CHECK-NEXT: eor.w r5, r4, r2
; CHECK-NEXT: sbc.w r2, r4, r2
; CHECK-NEXT: eors r4, r2
; CHECK-NEXT: ands r5, r4
; CHECK-NEXT: cset r5, mi
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: asrne r4, r3, #31
; CHECK-NEXT: asrne r3, r2, #31
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
; CHECK-NEXT: cset r0, mi
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
; CHECK-NEXT: cset r2, mi
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cinv r2, r5, eq
; CHECK-NEXT: cinv r0, r3, eq
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csel r0, r2, r0, ne
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: cset r2, mi
; CHECK-NEXT: csel r0, r0, r1, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cinv r2, r5, eq
; CHECK-NEXT: cset r1, mi
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csel r1, r2, r3, ne
; CHECK-NEXT: cinv r1, r3, eq
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: csel r1, r1, r2, ne
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
Expand Down Expand Up @@ -271,35 +259,31 @@ define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: sbcs.w r0, r1, r0
; CHECK-NEXT: adc r1, r12, #0
; CHECK-NEXT: rsbs.w lr, r1, #1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov r4, r5, d0
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: adc r2, r12, #0
; CHECK-NEXT: rsbs.w lr, r2, #1
; CHECK-NEXT: vmov r3, r2, d2
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r2, #0
; CHECK-NEXT: subs r4, r5, r4
; CHECK-NEXT: sbcs r1, r3
; CHECK-NEXT: adc r3, r12, #0
; CHECK-NEXT: rsbs.w r3, r3, #1
; CHECK-NEXT: movne r0, #0
; CHECK-NEXT: subs r3, r4, r3
; CHECK-NEXT: sbcs.w r2, r5, r2
; CHECK-NEXT: adc r5, r12, #0
; CHECK-NEXT: rsbs.w r5, r5, #1
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
; CHECK-NEXT: movne r3, #0
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov q0[2], q0[0], r4, r2
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r1, #0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r2, #0
; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
Expand Down
146 changes: 69 additions & 77 deletions llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,41 +21,36 @@ define arm_aapcs_vfpcc void @scatter_inc_minipred_4i32(<4 x i32> %data, i32* %ds
define arm_aapcs_vfpcc void @scatter_inc_mini_8i16(<8 x i16> %data, i16* %dst, <8 x i32> %offs) {
; CHECK-LABEL: scatter_inc_mini_8i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vshl.i32 q1, q1, #1
; CHECK-NEXT: vmov.i32 q3, #0x10
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov.u16 r2, q0[0]
; CHECK-NEXT: vadd.i32 q4, q1, q3
; CHECK-NEXT: vmov.u16 r6, q0[0]
; CHECK-NEXT: vadd.i32 q1, q1, q3
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: vmov r3, r12, d3
; CHECK-NEXT: vshl.i32 q1, q2, #1
; CHECK-NEXT: vmov r1, s16
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: vadd.i32 q1, q1, q3
; CHECK-NEXT: strh r2, [r1]
; CHECK-NEXT: vmov r0, lr, d2
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: strh r6, [r1]
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: strh r1, [r2]
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: strh r1, [r3]
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: strh.w r1, [r12]
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: strh.w r0, [lr]
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: strh r0, [r4]
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: strh r0, [r5]
; CHECK-NEXT: pop {r4, r5, r6, pc}
%1 = add <8 x i32> %offs, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
%2 = getelementptr inbounds i16, i16* %dst, <8 x i32> %1
call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %data, <8 x i16*> %2, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
Expand All @@ -65,69 +60,66 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_8i16(<8 x i16> %data, i16* %dst, <
define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, i8* %dst, <16 x i32> %offs) {
; CHECK-LABEL: scatter_inc_mini_16i8:
; CHECK: @ %bb.0:
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q5, #0x10
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov.i32 q4, #0x10
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vadd.i32 q4, q1, q5
; CHECK-NEXT: vmov.u8 r2, q0[0]
; CHECK-NEXT: vmov r1, s16
; CHECK-NEXT: vadd.i32 q1, q1, q4
; CHECK-NEXT: add r5, sp, #48
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: vadd.i32 q3, q3, r0
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vadd.i32 q3, q3, q5
; CHECK-NEXT: vadd.i32 q2, q2, q5
; CHECK-NEXT: strb r2, [r1]
; CHECK-NEXT: add r1, sp, #32
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov.u8 r1, q0[1]
; CHECK-NEXT: vmov r3, r12, d3
; CHECK-NEXT: vadd.i32 q1, q2, r0
; CHECK-NEXT: vadd.i32 q2, q1, q4
; CHECK-NEXT: vldrw.u32 q1, [r5]
; CHECK-NEXT: vmov lr, r7, d4
; CHECK-NEXT: vmov.u8 r6, q0[0]
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: vadd.i32 q1, q1, q5
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov.u8 r1, q0[2]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov.u8 r1, q0[3]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov.u8 r1, q0[4]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.u8 r1, q0[5]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.u8 r1, q0[6]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.u8 r1, q0[7]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov.u8 r1, q0[8]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s13
; CHECK-NEXT: vmov r0, r8, d5
; CHECK-NEXT: vadd.i32 q2, q3, q4
; CHECK-NEXT: vmov.u8 r4, q0[4]
; CHECK-NEXT: vadd.i32 q1, q1, q4
; CHECK-NEXT: vmov.u8 r5, q0[6]
; CHECK-NEXT: strb r6, [r1]
; CHECK-NEXT: vmov.u8 r1, q0[1]
; CHECK-NEXT: strb r1, [r2]
; CHECK-NEXT: vmov.u8 r6, q0[2]
; CHECK-NEXT: vmov r1, r9, d4
; CHECK-NEXT: strb r6, [r3]
; CHECK-NEXT: vmov.u8 r3, q0[3]
; CHECK-NEXT: vmov.u8 r2, q0[8]
; CHECK-NEXT: strb.w r3, [r12]
; CHECK-NEXT: vmov r3, r6, d5
; CHECK-NEXT: strb.w r4, [lr]
; CHECK-NEXT: vmov.u8 r4, q0[5]
; CHECK-NEXT: strb r4, [r7]
; CHECK-NEXT: vmov r7, r4, d2
; CHECK-NEXT: strb r5, [r0]
; CHECK-NEXT: vmov.u8 r0, q0[7]
; CHECK-NEXT: strb.w r0, [r8]
; CHECK-NEXT: vmov r0, r5, d3
; CHECK-NEXT: strb r2, [r1]
; CHECK-NEXT: vmov.u8 r1, q0[9]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: strb.w r1, [r9]
; CHECK-NEXT: vmov.u8 r1, q0[10]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: strb r1, [r3]
; CHECK-NEXT: vmov.u8 r1, q0[11]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: strb r1, [r6]
; CHECK-NEXT: vmov.u8 r1, q0[12]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: strb r1, [r7]
; CHECK-NEXT: vmov.u8 r1, q0[13]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: strb r1, [r4]
; CHECK-NEXT: vmov.u8 r1, q0[14]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.u8 r1, q0[15]
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov.u8 r0, q0[15]
; CHECK-NEXT: strb r0, [r5]
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
%1 = add <16 x i32> %offs, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%2 = getelementptr inbounds i8, i8* %dst, <16 x i32> %1
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %data, <16 x i8*> %2, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
Expand Down
146 changes: 69 additions & 77 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ind16-scaled.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,37 +51,35 @@ entry:
define arm_aapcs_vfpcc void @scaled_v8i16_sext(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
; CHECK-LABEL: scaled_v8i16_sext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q2, [r1]
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vldrh.s32 q1, [r1]
; CHECK-NEXT: vmov.u16 r6, q0[0]
; CHECK-NEXT: vshl.i32 q1, q1, #1
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r2, r3, d2
; CHECK-NEXT: vmov r12, lr, d3
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vshl.i32 q2, q2, #1
; CHECK-NEXT: vshl.i32 q1, q1, #1
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: strh r6, [r2]
; CHECK-NEXT: vmov.u16 r2, q0[1]
; CHECK-NEXT: strh r2, [r3]
; CHECK-NEXT: vmov.u16 r2, q0[2]
; CHECK-NEXT: strh.w r2, [r12]
; CHECK-NEXT: vmov.u16 r2, q0[3]
; CHECK-NEXT: strh.w r2, [lr]
; CHECK-NEXT: vmov.u16 r2, q0[4]
; CHECK-NEXT: strh r2, [r0]
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: strh r0, [r1]
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: strh r0, [r4]
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: strh r0, [r5]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
%offs.sext = sext <8 x i16> %offs to <8 x i32>
Expand All @@ -99,27 +97,23 @@ define arm_aapcs_vfpcc void @scaled_v8f16_sext(i16* %base, <8 x i16>* %offptr, <
; CHECK-NEXT: vshl.i32 q2, q1, #1
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vmov r1, s8
; CHECK-NEXT: vshl.i32 q1, q1, #1
; CHECK-NEXT: vmov r1, r2, d4
; CHECK-NEXT: vstr.16 s0, [r1]
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vstr.16 s12, [r2]
; CHECK-NEXT: vmov r1, r2, d5
; CHECK-NEXT: vmovx.f16 s8, s1
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vstr.16 s12, [r1]
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vstr.16 s1, [r1]
; CHECK-NEXT: vmov r1, s11
; CHECK-NEXT: vmovx.f16 s8, s1
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vstr.16 s8, [r1]
; CHECK-NEXT: vstr.16 s2, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vstr.16 s8, [r2]
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmovx.f16 s8, s2
; CHECK-NEXT: vstr.16 s8, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vstr.16 s2, [r0]
; CHECK-NEXT: vstr.16 s8, [r1]
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vstr.16 s3, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: vstr.16 s0, [r1]
; CHECK-NEXT: bx lr
entry:
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
Expand Down Expand Up @@ -180,40 +174,38 @@ entry:
define arm_aapcs_vfpcc void @scaled_v8i16_i16_2gep(i16* %base, <8 x i16>* %offptr, <8 x i16> %input) {
; CHECK-LABEL: scaled_v8i16_i16_2gep:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
; CHECK-NEXT: vldrh.s32 q3, [r1]
; CHECK-NEXT: vmov.i32 q2, #0x28
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vshl.i32 q1, q1, #1
; CHECK-NEXT: vshl.i32 q3, q3, #1
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vadd.i32 q3, q3, r0
; CHECK-NEXT: vadd.i32 q1, q1, q2
; CHECK-NEXT: vadd.i32 q2, q3, q2
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vldrh.s32 q2, [r1]
; CHECK-NEXT: vmov.i32 q1, #0x28
; CHECK-NEXT: vmov.u16 r6, q0[0]
; CHECK-NEXT: vshl.i32 q2, q2, #1
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vadd.i32 q2, q2, q1
; CHECK-NEXT: vmov r2, r3, d4
; CHECK-NEXT: vmov r12, lr, d5
; CHECK-NEXT: vldrh.s32 q2, [r1, #8]
; CHECK-NEXT: vshl.i32 q2, q2, #1
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vadd.i32 q1, q2, q1
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: strh r6, [r2]
; CHECK-NEXT: vmov.u16 r2, q0[1]
; CHECK-NEXT: strh r2, [r3]
; CHECK-NEXT: vmov.u16 r2, q0[2]
; CHECK-NEXT: strh.w r2, [r12]
; CHECK-NEXT: vmov.u16 r2, q0[3]
; CHECK-NEXT: strh.w r2, [lr]
; CHECK-NEXT: vmov.u16 r2, q0[4]
; CHECK-NEXT: strh r2, [r0]
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: strh r0, [r1]
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: strh r0, [r4]
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: strh r0, [r5]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%offs = load <8 x i16>, <8 x i16>* %offptr, align 2
%ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %offs
Expand Down
348 changes: 162 additions & 186 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ind16-unscaled.ll

Large diffs are not rendered by default.

22 changes: 10 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ind32-scaled.ll
Original file line number Diff line number Diff line change
Expand Up @@ -236,24 +236,22 @@ entry:
define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep(i16* %base, <4 x i32>* %offptr, <4 x i32> %input) {
; CHECK-LABEL: ext_scaled_i16_i32_2gep:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vldrw.u32 q2, [r1]
; CHECK-NEXT: vmov.i32 q1, #0xa
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r1, r3, d0
; CHECK-NEXT: vshl.i32 q2, q2, #1
; CHECK-NEXT: vmov r4, r5, d1
; CHECK-NEXT: vadd.i32 q2, q2, r0
; CHECK-NEXT: vadd.i32 q1, q2, q1
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r0, r12, d2
; CHECK-NEXT: vmov r2, lr, d3
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: strh.w r3, [r12]
; CHECK-NEXT: strh r4, [r2]
; CHECK-NEXT: strh.w r5, [lr]
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%offs = load <4 x i32>, <4 x i32>* %offptr, align 4
%ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs
Expand Down
44 changes: 20 additions & 24 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ind32-unscaled.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,21 +361,19 @@ entry:
define arm_aapcs_vfpcc void @trunc_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) {
; CHECK-LABEL: trunc_signed_unscaled_i16_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vldrb.s32 q1, [r1]
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r1, r3, d0
; CHECK-NEXT: vmov r4, r5, d1
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r0, r12, d2
; CHECK-NEXT: vmov r2, lr, d3
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: strb.w r3, [r12]
; CHECK-NEXT: strb r4, [r2]
; CHECK-NEXT: strb.w r5, [lr]
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%offs = load <4 x i8>, <4 x i8>* %offptr, align 1
%offs.sext = sext <4 x i8> %offs to <4 x i32>
Expand All @@ -388,21 +386,19 @@ entry:
define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) {
; CHECK-LABEL: trunc_unsigned_unscaled_i16_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vldrb.u32 q1, [r1]
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r1, r3, d0
; CHECK-NEXT: vmov r4, r5, d1
; CHECK-NEXT: vadd.i32 q1, q1, r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r0, r12, d2
; CHECK-NEXT: vmov r2, lr, d3
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: strb r1, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: strb.w r3, [r12]
; CHECK-NEXT: strb r4, [r2]
; CHECK-NEXT: strb.w r5, [lr]
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%offs = load <4 x i8>, <4 x i8>* %offptr, align 1
%offs.zext = zext <4 x i8> %offs to <4 x i32>
Expand Down
634 changes: 292 additions & 342 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ind8-unscaled.ll

Large diffs are not rendered by default.

502 changes: 224 additions & 278 deletions llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll

Large diffs are not rendered by default.

75 changes: 30 additions & 45 deletions llvm/test/CodeGen/Thumb2/mve-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,12 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbfx r0, r0, #0, #3
; CHECK-NEXT: sbfx r1, r1, #0, #3
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: sbfx r0, r1, #0, #3
; CHECK-NEXT: sbfx r1, r3, #0, #3
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%shl = shl <2 x i64> %m, <i64 29, i64 29>
Expand Down Expand Up @@ -480,22 +477,18 @@ define arm_aapcs_vfpcc <8 x i16> @trunc_v8i32_v8i16(<8 x i32> %src) {
; CHECK-LABEL: trunc_v8i32_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q2, q0
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: vmov.16 q0[0], r0
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.16 q0[1], r0
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.16 q0[1], r1
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: vmov.16 q0[2], r0
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.16 q0[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov.16 q0[3], r1
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov.16 q0[4], r0
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.16 q0[5], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.16 q0[5], r1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov.16 q0[6], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.16 q0[7], r0
; CHECK-NEXT: vmov.16 q0[7], r1
; CHECK-NEXT: bx lr
entry:
%0 = trunc <8 x i32> %src to <8 x i16>
Expand All @@ -508,38 +501,30 @@ define arm_aapcs_vfpcc <16 x i8> @trunc_v16i32_v16i8(<16 x i32> %src) {
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov.8 q0[0], r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: vmov.8 q0[1], r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov.8 q0[1], r1
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov.8 q0[2], r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov.8 q0[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov.8 q0[3], r1
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov.8 q0[4], r0
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov.8 q0[5], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.8 q0[5], r1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov.8 q0[6], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov.8 q0[7], r0
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov.8 q0[7], r1
; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: vmov.8 q0[8], r0
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.8 q0[9], r0
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.8 q0[9], r1
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: vmov.8 q0[10], r0
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.8 q0[11], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov.8 q0[11], r1
; CHECK-NEXT: vmov r0, r1, d6
; CHECK-NEXT: vmov.8 q0[12], r0
; CHECK-NEXT: vmov r0, s13
; CHECK-NEXT: vmov.8 q0[13], r0
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vmov.8 q0[13], r1
; CHECK-NEXT: vmov r0, r1, d7
; CHECK-NEXT: vmov.8 q0[14], r0
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.8 q0[15], r0
; CHECK-NEXT: vmov.8 q0[15], r1
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
Expand Down
86 changes: 37 additions & 49 deletions llvm/test/CodeGen/Thumb2/mve-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shl_qq_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, r1, d1
; CHECK-NEXT: lsll r2, r1, r0
; CHECK-NEXT: vmov r12, s4
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: lsll r0, r3, r12
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r0, r5, d0
; CHECK-NEXT: lsll r0, r5, r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
; CHECK-NEXT: pop {r5, pc}
entry:
%0 = shl <2 x i64> %src1, %src2
ret <2 x i64> %0
Expand Down Expand Up @@ -91,12 +91,10 @@ define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %sr
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vmov r5, s3
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, r5, d1
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: lsll r0, r5, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: lsll r2, r3, r1
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
Expand Down Expand Up @@ -144,17 +142,17 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: shrs_qq_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, r1, d1
; CHECK-NEXT: asrl r2, r1, r0
; CHECK-NEXT: vmov r12, s4
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: asrl r0, r3, r12
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r0, r5, d0
; CHECK-NEXT: asrl r0, r5, r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
; CHECK-NEXT: pop {r5, pc}
entry:
%0 = ashr <2 x i64> %src1, %src2
ret <2 x i64> %0
Expand Down Expand Up @@ -194,11 +192,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
; CHECK-LABEL: shl_qi_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: lsll r0, r1, #4
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: lsll r2, r3, #4
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
Expand Down Expand Up @@ -242,11 +238,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
; CHECK-LABEL: shru_qi_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: lsrl r0, r1, #4
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: lsrl r2, r3, #4
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
Expand Down Expand Up @@ -290,11 +284,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
; CHECK-LABEL: shrs_qi_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: asrl r0, r1, #4
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: asrl r2, r3, #4
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
Expand Down Expand Up @@ -344,11 +336,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
; CHECK-LABEL: shl_qr_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r12, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: lsll r12, r1, r0
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: lsll r2, r3, r0
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
Expand Down Expand Up @@ -403,16 +393,16 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
; CHECK-LABEL: shru_qr_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: rsb.w r12, r0, #0
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: lsll r2, r1, r12
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: lsll r0, r3, r12
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: rsbs r3, r0, #0
; CHECK-NEXT: vmov r2, r1, d1
; CHECK-NEXT: vmov r0, r5, d0
; CHECK-NEXT: lsll r2, r1, r3
; CHECK-NEXT: lsll r0, r5, r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
; CHECK-NEXT: bx lr
; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
; CHECK-NEXT: pop {r5, pc}
entry:
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
%s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
Expand Down Expand Up @@ -463,11 +453,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
; CHECK-LABEL: shrs_qr_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r12, s2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: asrl r12, r1, r0
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: asrl r2, r3, r0
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1678,8 +1678,7 @@ entry:
define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
; CHECK-LABEL: extract_i64_0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: bx lr
entry:
%res = extractelement <2 x i64> %a, i32 0
Expand All @@ -1689,8 +1688,7 @@ entry:
define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
; CHECK-LABEL: extract_i64_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: bx lr
entry:
%res = extractelement <2 x i64> %a, i32 1
Expand Down
94 changes: 41 additions & 53 deletions llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,19 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: add_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: adds.w lr, r3, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: adc.w r12, r1, r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov lr, r12, d3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov r1, r0, d2
; CHECK-NEXT: vmov r4, r5, d0
; CHECK-NEXT: adds.w r2, r2, lr
; CHECK-NEXT: adc.w r3, r3, r12
; CHECK-NEXT: adds r1, r1, r4
; CHECK-NEXT: adcs r0, r5
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = add nsw <2 x i64> %src1, %src2
ret <2 x i64> %0
Expand Down Expand Up @@ -172,23 +168,19 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: sub_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: subs.w lr, r3, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: sbc.w r12, r1, r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbc.w r1, r3, r1
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: vmov r2, r3, d3
; CHECK-NEXT: vmov r1, r0, d0
; CHECK-NEXT: vmov r4, r5, d2
; CHECK-NEXT: subs.w r2, r2, lr
; CHECK-NEXT: sbc.w r3, r3, r12
; CHECK-NEXT: subs r1, r4, r1
; CHECK-NEXT: sbc.w r0, r5, r0
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r3
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = sub nsw <2 x i64> %src2, %src1
ret <2 x i64> %0
Expand Down Expand Up @@ -309,25 +301,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: mul_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s7
; CHECK-NEXT: umull r12, r3, r1, r0
; CHECK-NEXT: mla lr, r1, r2, r3
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: umull r4, r5, r1, r3
; CHECK-NEXT: mla r1, r1, r2, r5
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: mla r0, r2, r0, lr
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
; CHECK-NEXT: mla r1, r2, r3, r1
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: vmov r0, r1, d2
; CHECK-NEXT: vmov r2, lr, d0
; CHECK-NEXT: vmov r4, r5, d3
; CHECK-NEXT: umull r12, r3, r2, r0
; CHECK-NEXT: mla r1, r2, r1, r3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: mla r0, lr, r0, r1
; CHECK-NEXT: umull r6, r7, r2, r4
; CHECK-NEXT: mla r2, r2, r5, r7
; CHECK-NEXT: vmov q0[2], q0[0], r12, r6
; CHECK-NEXT: mla r2, r3, r4, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%0 = mul nsw <2 x i64> %src1, %src2
ret <2 x i64> %0
Expand Down
102 changes: 56 additions & 46 deletions llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -91,37 +91,47 @@ entry:
}

define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK-LE-LABEL: vector_add_i64:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: .save {r7, lr}
; CHECK-LE-NEXT: push {r7, lr}
; CHECK-LE-NEXT: add.w r12, sp, #8
; CHECK-LE-NEXT: vldrw.u32 q0, [r12]
; CHECK-LE-NEXT: vmov lr, s0
; CHECK-LE-NEXT: vmov r12, s1
; CHECK-LE-NEXT: adds.w r0, r0, lr
; CHECK-LE-NEXT: vmov lr, s2
; CHECK-LE-NEXT: adc.w r1, r1, r12
; CHECK-LE-NEXT: vmov r12, s3
; CHECK-LE-NEXT: adds.w r2, r2, lr
; CHECK-LE-NEXT: adc.w r3, r3, r12
; CHECK-LE-NEXT: pop {r7, pc}
; CHECK-MVE-LABEL: vector_add_i64:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .save {r7, lr}
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: add.w r12, sp, #8
; CHECK-MVE-NEXT: vldrw.u32 q0, [r12]
; CHECK-MVE-NEXT: vmov r12, lr, d0
; CHECK-MVE-NEXT: adds.w r0, r0, r12
; CHECK-MVE-NEXT: adc.w r1, r1, lr
; CHECK-MVE-NEXT: vmov r12, lr, d1
; CHECK-MVE-NEXT: adds.w r2, r2, r12
; CHECK-MVE-NEXT: adc.w r3, r3, lr
; CHECK-MVE-NEXT: pop {r7, pc}
;
; CHECK-BE-LABEL: vector_add_i64:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .save {r7, lr}
; CHECK-BE-NEXT: push {r7, lr}
; CHECK-BE-NEXT: add.w r12, sp, #8
; CHECK-BE-NEXT: vldrw.u32 q0, [r12]
; CHECK-BE-NEXT: vmov lr, s1
; CHECK-BE-NEXT: vmov r12, s0
; CHECK-BE-NEXT: vmov r12, lr, d0
; CHECK-BE-NEXT: adds.w r1, r1, lr
; CHECK-BE-NEXT: vmov lr, s3
; CHECK-BE-NEXT: adc.w r0, r0, r12
; CHECK-BE-NEXT: vmov r12, s2
; CHECK-BE-NEXT: vmov r12, lr, d1
; CHECK-BE-NEXT: adds.w r3, r3, lr
; CHECK-BE-NEXT: adc.w r2, r2, r12
; CHECK-BE-NEXT: pop {r7, pc}
;
; CHECK-FP-LABEL: vector_add_i64:
; CHECK-FP: @ %bb.0: @ %entry
; CHECK-FP-NEXT: .save {r4, r5, r7, lr}
; CHECK-FP-NEXT: push {r4, r5, r7, lr}
; CHECK-FP-NEXT: add.w r12, sp, #16
; CHECK-FP-NEXT: vldrw.u32 q0, [r12]
; CHECK-FP-NEXT: vmov r12, lr, d0
; CHECK-FP-NEXT: vmov r4, r5, d1
; CHECK-FP-NEXT: adds.w r0, r0, r12
; CHECK-FP-NEXT: adc.w r1, r1, lr
; CHECK-FP-NEXT: adds r2, r2, r4
; CHECK-FP-NEXT: adcs r3, r5
; CHECK-FP-NEXT: pop {r4, r5, r7, pc}
entry:
%sum = add <2 x i64> %lhs, %rhs
ret <2 x i64> %sum
Expand Down Expand Up @@ -338,67 +348,67 @@ entry:
define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
; CHECK-MVE-LABEL: vector_add_f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .save {r7, lr}
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-MVE-NEXT: vmov d11, r2, r3
; CHECK-MVE-NEXT: vmov d10, r0, r1
; CHECK-MVE-NEXT: add r1, sp, #56
; CHECK-MVE-NEXT: vldrw.u32 q6, [r1]
; CHECK-MVE-NEXT: vmov r0, s23
; CHECK-MVE-NEXT: vmov r1, s27
; CHECK-MVE-NEXT: vmov d13, r2, r3
; CHECK-MVE-NEXT: vmov d12, r0, r1
; CHECK-MVE-NEXT: add r1, sp, #64
; CHECK-MVE-NEXT: vldrw.u32 q5, [r1]
; CHECK-MVE-NEXT: vmov r4, r0, d13
; CHECK-MVE-NEXT: vmov r5, r1, d11
; CHECK-MVE-NEXT: bl __aeabi_fadd
; CHECK-MVE-NEXT: vmov s19, r0
; CHECK-MVE-NEXT: vmov r0, s22
; CHECK-MVE-NEXT: vmov r1, s26
; CHECK-MVE-NEXT: mov r0, r4
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fadd
; CHECK-MVE-NEXT: vmov s18, r0
; CHECK-MVE-NEXT: vmov r0, s21
; CHECK-MVE-NEXT: vmov r1, s25
; CHECK-MVE-NEXT: vmov r4, r0, d12
; CHECK-MVE-NEXT: vmov r5, r1, d10
; CHECK-MVE-NEXT: bl __aeabi_fadd
; CHECK-MVE-NEXT: vmov s17, r0
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: vmov r1, s24
; CHECK-MVE-NEXT: mov r0, r4
; CHECK-MVE-NEXT: mov r1, r5
; CHECK-MVE-NEXT: bl __aeabi_fadd
; CHECK-MVE-NEXT: vmov s16, r0
; CHECK-MVE-NEXT: vmov r2, r3, d9
; CHECK-MVE-NEXT: vmov r0, r1, d8
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-MVE-NEXT: pop {r7, pc}
; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-BE-LABEL: vector_add_f32:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: .save {r7, lr}
; CHECK-BE-NEXT: push {r7, lr}
; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
; CHECK-BE-NEXT: push {r4, r5, r7, lr}
; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-BE-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-BE-NEXT: vmov d1, r3, r2
; CHECK-BE-NEXT: vmov d0, r1, r0
; CHECK-BE-NEXT: add r1, sp, #56
; CHECK-BE-NEXT: add r1, sp, #64
; CHECK-BE-NEXT: vldrw.u32 q6, [r1]
; CHECK-BE-NEXT: vrev64.32 q5, q0
; CHECK-BE-NEXT: vmov r0, s23
; CHECK-BE-NEXT: vmov r1, s27
; CHECK-BE-NEXT: vmov r4, r0, d11
; CHECK-BE-NEXT: vmov r5, r1, d13
; CHECK-BE-NEXT: bl __aeabi_fadd
; CHECK-BE-NEXT: vmov s19, r0
; CHECK-BE-NEXT: vmov r0, s22
; CHECK-BE-NEXT: vmov r1, s26
; CHECK-BE-NEXT: mov r0, r4
; CHECK-BE-NEXT: mov r1, r5
; CHECK-BE-NEXT: bl __aeabi_fadd
; CHECK-BE-NEXT: vmov s18, r0
; CHECK-BE-NEXT: vmov r0, s21
; CHECK-BE-NEXT: vmov r1, s25
; CHECK-BE-NEXT: vmov r4, r0, d10
; CHECK-BE-NEXT: vmov r5, r1, d12
; CHECK-BE-NEXT: bl __aeabi_fadd
; CHECK-BE-NEXT: vmov s17, r0
; CHECK-BE-NEXT: vmov r0, s20
; CHECK-BE-NEXT: vmov r1, s24
; CHECK-BE-NEXT: mov r0, r4
; CHECK-BE-NEXT: mov r1, r5
; CHECK-BE-NEXT: bl __aeabi_fadd
; CHECK-BE-NEXT: vmov s16, r0
; CHECK-BE-NEXT: vrev64.32 q0, q4
; CHECK-BE-NEXT: vmov r1, r0, d0
; CHECK-BE-NEXT: vmov r3, r2, d1
; CHECK-BE-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-BE-NEXT: pop {r7, pc}
; CHECK-BE-NEXT: pop {r4, r5, r7, pc}
;
; CHECK-FP-LABEL: vector_add_f32:
; CHECK-FP: @ %bb.0: @ %entry
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/Thumb2/mve-vabd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,42 @@
define arm_aapcs_vfpcc void @vabd_v4f32(<4 x float> %x, <4 x float> %y, <4 x float>* %z) {
; CHECK-MVE-LABEL: vabd_v4f32:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; CHECK-MVE-NEXT: .pad #4
; CHECK-MVE-NEXT: sub sp, #4
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
; CHECK-MVE-NEXT: vmov q4, q1
; CHECK-MVE-NEXT: vmov q5, q0
; CHECK-MVE-NEXT: mov r4, r0
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: vmov r1, s16
; CHECK-MVE-NEXT: mov r8, r0
; CHECK-MVE-NEXT: vmov r0, r6, d10
; CHECK-MVE-NEXT: vmov r1, r7, d8
; CHECK-MVE-NEXT: bl __aeabi_fsub
; CHECK-MVE-NEXT: mov r5, r0
; CHECK-MVE-NEXT: vmov r0, s21
; CHECK-MVE-NEXT: vmov r1, s17
; CHECK-MVE-NEXT: mov r9, r0
; CHECK-MVE-NEXT: mov r0, r6
; CHECK-MVE-NEXT: mov r1, r7
; CHECK-MVE-NEXT: bl __aeabi_fsub
; CHECK-MVE-NEXT: mov r6, r0
; CHECK-MVE-NEXT: vmov r0, s22
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmov r0, r7, d11
; CHECK-MVE-NEXT: vmov r1, r4, d9
; CHECK-MVE-NEXT: bl __aeabi_fsub
; CHECK-MVE-NEXT: mov r7, r0
; CHECK-MVE-NEXT: vmov r0, s23
; CHECK-MVE-NEXT: vmov r1, s19
; CHECK-MVE-NEXT: mov r5, r0
; CHECK-MVE-NEXT: mov r0, r7
; CHECK-MVE-NEXT: mov r1, r4
; CHECK-MVE-NEXT: bl __aeabi_fsub
; CHECK-MVE-NEXT: bic r0, r0, #-2147483648
; CHECK-MVE-NEXT: vmov s3, r0
; CHECK-MVE-NEXT: bic r0, r7, #-2147483648
; CHECK-MVE-NEXT: bic r0, r5, #-2147483648
; CHECK-MVE-NEXT: vmov s2, r0
; CHECK-MVE-NEXT: bic r0, r6, #-2147483648
; CHECK-MVE-NEXT: vmov s1, r0
; CHECK-MVE-NEXT: bic r0, r5, #-2147483648
; CHECK-MVE-NEXT: bic r0, r9, #-2147483648
; CHECK-MVE-NEXT: vmov s0, r0
; CHECK-MVE-NEXT: vstrw.32 q0, [r4]
; CHECK-MVE-NEXT: vstrw.32 q0, [r8]
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
; CHECK-MVE-NEXT: add sp, #4
; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
;
; CHECK-MVEFP-LABEL: vabd_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
Expand Down
112 changes: 48 additions & 64 deletions llvm/test/CodeGen/Thumb2/mve-vabdus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -159,43 +159,35 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-NEXT: vmov.f32 s14, s3
; CHECK-NEXT: vand q2, q2, q4
; CHECK-NEXT: vand q3, q3, q4
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov r1, s13
; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: vmov r2, r3, d6
; CHECK-NEXT: vmov.f32 s6, s5
; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: vand q1, q1, q4
; CHECK-NEXT: vand q4, q0, q4
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: sbc.w r0, r1, r0
; CHECK-NEXT: add.w r1, r2, r0, asr #31
; CHECK-NEXT: vmov r2, s17
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: sbc.w r1, r2, r1
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbc.w r1, r3, r1
; CHECK-NEXT: add.w r0, r0, r1, asr #31
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
; CHECK-NEXT: vmov r1, s15
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
; CHECK-NEXT: vmov r1, r2, d2
; CHECK-NEXT: vmov r3, r0, d8
; CHECK-NEXT: subs r1, r3, r1
; CHECK-NEXT: sbcs r0, r2
; CHECK-NEXT: vmov r2, r3, d7
; CHECK-NEXT: add.w r1, r1, r0, asr #31
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
; CHECK-NEXT: vmov q0[2], q0[0], r0, r12
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: sbc.w r0, r1, r0
; CHECK-NEXT: add.w r1, r2, r0, asr #31
; CHECK-NEXT: vmov r2, s19
; CHECK-NEXT: eor.w r12, r1, r0, asr #31
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbc.w r1, r2, r1
; CHECK-NEXT: vmov r0, r1, d5
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: sbc.w r1, r3, r1
; CHECK-NEXT: add.w r0, r0, r1, asr #31
; CHECK-NEXT: eor.w r0, r0, r1, asr #31
; CHECK-NEXT: eor.w r12, r0, r1, asr #31
; CHECK-NEXT: vmov r1, r2, d3
; CHECK-NEXT: vmov r3, r0, d9
; CHECK-NEXT: subs r1, r3, r1
; CHECK-NEXT: sbcs r0, r2
; CHECK-NEXT: add.w r1, r1, r0, asr #31
; CHECK-NEXT: eor.w r0, r1, r0, asr #31
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
Expand Down Expand Up @@ -548,57 +540,49 @@ define void @vabd_loop_u32(i32* nocapture readonly %x, i32* nocapture readonly %
; CHECK-NEXT: vmov.f32 s14, s9
; CHECK-NEXT: vand q4, q3, q0
; CHECK-NEXT: vldrw.u32 q3, [r0], #16
; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: vmov r3, r4, d8
; CHECK-NEXT: vmov.f32 s20, s12
; CHECK-NEXT: vmov.f32 s22, s13
; CHECK-NEXT: vand q5, q5, q0
; CHECK-NEXT: vmov r4, s17
; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: vmov r5, s21
; CHECK-NEXT: vmov r7, s23
; CHECK-NEXT: subs.w r8, r6, r3
; CHECK-NEXT: vmov r3, s22
; CHECK-NEXT: sbc.w r4, r5, r4
; CHECK-NEXT: vmov r6, s19
; CHECK-NEXT: vmov r5, r6, d10
; CHECK-NEXT: subs.w r8, r5, r3
; CHECK-NEXT: vmov r7, r3, d11
; CHECK-NEXT: sbc.w r4, r6, r4
; CHECK-NEXT: asrs r5, r4, #31
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: bfi r4, r5, #0, #4
; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vmov r5, r6, d9
; CHECK-NEXT: vmov.f32 s16, s10
; CHECK-NEXT: vmov.f32 s18, s11
; CHECK-NEXT: vand q2, q4, q0
; CHECK-NEXT: vmov.f32 s16, s14
; CHECK-NEXT: vmov.f32 s18, s15
; CHECK-NEXT: vand q3, q4, q0
; CHECK-NEXT: vmov r12, s12
; CHECK-NEXT: subs.w r9, r3, r5
; CHECK-NEXT: vmov r5, s14
; CHECK-NEXT: sbc.w r3, r7, r6
; CHECK-NEXT: movs r7, #1
; CHECK-NEXT: vmov r6, s15
; CHECK-NEXT: subs.w r9, r7, r5
; CHECK-NEXT: mov.w r7, #1
; CHECK-NEXT: sbcs r3, r6
; CHECK-NEXT: and.w r3, r7, r3, asr #31
; CHECK-NEXT: vmov r7, s10
; CHECK-NEXT: vmov r7, r5, d7
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: bfi r4, r3, #4, #4
; CHECK-NEXT: vmov r3, s11
; CHECK-NEXT: subs.w r10, r5, r7
; CHECK-NEXT: vmov r7, s9
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: sbc.w r3, r6, r3
; CHECK-NEXT: vmov r6, s8
; CHECK-NEXT: asr.w r11, r3, #31
; CHECK-NEXT: subs.w r6, r12, r6
; CHECK-NEXT: sbc.w r7, r5, r7
; CHECK-NEXT: asrs r7, r7, #31
; CHECK-NEXT: vmov q2[2], q2[0], r7, r11
; CHECK-NEXT: vmov r7, s8
; CHECK-NEXT: vmov r3, r6, d5
; CHECK-NEXT: subs.w r10, r7, r3
; CHECK-NEXT: vmov r7, r3, d4
; CHECK-NEXT: sbcs r5, r6
; CHECK-NEXT: vmov r6, r12, d6
; CHECK-NEXT: asr.w r11, r5, #31
; CHECK-NEXT: subs r6, r6, r7
; CHECK-NEXT: sbc.w r3, r12, r3
; CHECK-NEXT: asrs r3, r3, #31
; CHECK-NEXT: vmov q2[2], q2[0], r3, r11
; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmov q2[2], q2[0], r8, r6
; CHECK-NEXT: vmov q2[3], q2[1], r9, r10
; CHECK-NEXT: and r7, r7, #1
; CHECK-NEXT: rsbs r7, r7, #0
; CHECK-NEXT: bfi r4, r7, #8, #4
; CHECK-NEXT: movs r7, #1
; CHECK-NEXT: and.w r3, r7, r3, asr #31
; CHECK-NEXT: and r3, r3, #1
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: bfi r4, r3, #8, #4
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: and.w r3, r3, r5, asr #31
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: bfi r4, r3, #12, #4
; CHECK-NEXT: vmsr p0, r4
Expand Down
24 changes: 10 additions & 14 deletions llvm/test/CodeGen/Thumb2/mve-vaddv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@ declare i8 @llvm.vector.reduce.add.i8.v32i8(<32 x i8>)
define arm_aapcs_vfpcc i64 @vaddv_v2i64_i64(<2 x i64> %s1) {
; CHECK-LABEL: vaddv_v2i64_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: bx lr
entry:
%r = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
Expand Down Expand Up @@ -92,14 +90,12 @@ define arm_aapcs_vfpcc i64 @vaddva_v2i64_i64(<2 x i64> %s1, i64 %x) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov r12, s3
; CHECK-NEXT: vmov lr, s1
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, lr, r12
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: vmov r3, r2, d0
; CHECK-NEXT: adds.w r3, r3, lr
; CHECK-NEXT: adc.w r2, r2, r12
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: adcs r1, r2
; CHECK-NEXT: pop {r7, pc}
entry:
%t = call i64 @llvm.vector.reduce.add.i64.v2i64(<2 x i64> %s1)
Expand Down
42 changes: 16 additions & 26 deletions llvm/test/CodeGen/Thumb2/mve-vcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,22 +367,18 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eq_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r12, r2, d2
; CHECK-NEXT: vmov r3, r1, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: eor.w r2, r3, r12
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
Expand All @@ -402,22 +398,18 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eq_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov r0, r1, d3
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: vmov r12, r2, d2
; CHECK-NEXT: vmov r3, r1, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: eor.w r2, r3, r12
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: cmp r1, #0
Expand All @@ -441,12 +433,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
60 changes: 24 additions & 36 deletions llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -433,18 +433,16 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eq_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r3, r1
; CHECK-NEXT: eors r2, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r3, d0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: eor.w r0, r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
Expand All @@ -466,18 +464,16 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eq_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r3, r1
; CHECK-NEXT: eors r2, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r3, d0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: eor.w r0, r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
Expand All @@ -503,12 +499,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down Expand Up @@ -1014,18 +1008,16 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_r_eq_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r3, r1
; CHECK-NEXT: eors r2, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r3, d0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: eor.w r0, r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
Expand All @@ -1047,18 +1039,16 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_r_eq_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: eors r3, r1
; CHECK-NEXT: eors r2, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r12, r3, d0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: eor.w r0, r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
Expand All @@ -1084,12 +1074,10 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b,
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eqz_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -388,11 +386,9 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eqz_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down Expand Up @@ -775,11 +771,9 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_r_eqz_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand All @@ -802,11 +796,9 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_r_eqz_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r1, r2, d0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/Thumb2/mve-vcvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,9 @@ define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov r1, s19
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_l2d
; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
Expand All @@ -292,11 +290,9 @@ define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov r1, s19
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_ul2d
; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
Expand Down
1,270 changes: 629 additions & 641 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll

Large diffs are not rendered by default.

1,354 changes: 608 additions & 746 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll

Large diffs are not rendered by default.

240 changes: 96 additions & 144 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-bit.ll

Large diffs are not rendered by default.

64 changes: 28 additions & 36 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-LABEL: mul_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: blt .LBB1_8
Expand All @@ -119,14 +119,12 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: vmul.i32 q0, q1, q0
; CHECK-NEXT: le lr, .LBB1_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov lr, r3, d1
; CHECK-NEXT: cmp r12, r1
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: mul lr, r3, r2
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r2, r4, d0
; CHECK-NEXT: mul r3, lr, r3
; CHECK-NEXT: mul r2, r4, r2
; CHECK-NEXT: mul r2, r3, r2
; CHECK-NEXT: mul r2, r2, lr
; CHECK-NEXT: beq .LBB1_8
; CHECK-NEXT: .LBB1_6: @ %for.body.preheader1
; CHECK-NEXT: sub.w lr, r1, r12
Expand All @@ -138,7 +136,7 @@ define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: le lr, .LBB1_7
; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
Expand Down Expand Up @@ -190,8 +188,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-LABEL: and_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: blt .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
Expand All @@ -217,13 +215,11 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: le lr, .LBB2_5
; CHECK-NEXT: @ %bb.6: @ %middle.block
; CHECK-NEXT: vmov r12, s3
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: cmp r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov lr, s1
; CHECK-NEXT: and.w r12, r12, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: and.w r2, r2, lr
; CHECK-NEXT: vmov r2, r4, d0
; CHECK-NEXT: and.w r12, r12, lr
; CHECK-NEXT: and.w r2, r2, r4
; CHECK-NEXT: and.w r2, r2, r12
; CHECK-NEXT: beq .LBB2_9
; CHECK-NEXT: .LBB2_7: @ %for.body.preheader1
Expand All @@ -236,7 +232,7 @@ define i32 @and_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: le lr, .LBB2_8
; CHECK-NEXT: .LBB2_9: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
Expand Down Expand Up @@ -288,8 +284,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-LABEL: or_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: blt .LBB3_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
Expand All @@ -315,13 +311,11 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: le lr, .LBB3_5
; CHECK-NEXT: @ %bb.6: @ %middle.block
; CHECK-NEXT: vmov r12, s3
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: cmp r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov lr, s1
; CHECK-NEXT: orr.w r12, r12, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orr.w r2, r2, lr
; CHECK-NEXT: vmov r2, r4, d0
; CHECK-NEXT: orr.w r12, r12, lr
; CHECK-NEXT: orr.w r2, r2, r4
; CHECK-NEXT: orr.w r2, r2, r12
; CHECK-NEXT: beq .LBB3_9
; CHECK-NEXT: .LBB3_7: @ %for.body.preheader1
Expand All @@ -334,7 +328,7 @@ define i32 @or_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: le lr, .LBB3_8
; CHECK-NEXT: .LBB3_9: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
Expand Down Expand Up @@ -386,8 +380,8 @@ for.cond.cleanup: ; preds = %for.body, %middle.b
define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-LABEL: xor_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r1, #1
; CHECK-NEXT: blt .LBB4_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
Expand All @@ -413,13 +407,11 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: veor q0, q1, q0
; CHECK-NEXT: le lr, .LBB4_5
; CHECK-NEXT: @ %bb.6: @ %middle.block
; CHECK-NEXT: vmov r12, s3
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: cmp r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov lr, s1
; CHECK-NEXT: eor.w r12, r12, r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: eor.w r2, r2, lr
; CHECK-NEXT: vmov r2, r4, d0
; CHECK-NEXT: eor.w r12, r12, lr
; CHECK-NEXT: eor.w r2, r2, r4
; CHECK-NEXT: eor.w r2, r2, r12
; CHECK-NEXT: beq .LBB4_9
; CHECK-NEXT: .LBB4_7: @ %for.body.preheader1
Expand All @@ -432,7 +424,7 @@ define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) {
; CHECK-NEXT: le lr, .LBB4_8
; CHECK-NEXT: .LBB4_9: @ %for.cond.cleanup
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
Expand Down
Loading